You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/08/14 17:15:11 UTC
[arrow] branch master updated: ARROW-3036: [Go] implement
array.NewSlice
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1810db2 ARROW-3036: [Go] implement array.NewSlice
1810db2 is described below
commit 1810db2bf4bc90f8b53d3293c85f02e315f1658a
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Tue Aug 14 13:15:02 2018 -0400
ARROW-3036: [Go] implement array.NewSlice
This CL implements the ability to slice a given array.Interface,
i.e. the Go equivalent of:
```
vs := []float64{1, 2, 3, 4}
sub := vs[1:3]
```
also, to support sub-slicing, offsets' support had to be implemented inside the `array.Data` and nullbitmap types.
namely, the `arrow/internal/bitutil.CountSetBits` function had to be modified to support counting bits from an arbitrary offset.
this resulted in a minor (?) speed-bump:
```
name old time/op new time/op delta
CountSetBits_3-8 8.16ns ± 1% 8.18ns ± 0% +0.31% (p=0.000 n=19+18)
CountSetBits_32-8 7.62ns ± 1% 7.79ns ± 1% +2.24% (p=0.000 n=18+18)
CountSetBits_128-8 6.80ns ± 1% 7.03ns ± 1% +3.40% (p=0.000 n=18+19)
CountSetBits_1000-8 18.7ns ± 0% 17.8ns ± 1% -4.92% (p=0.000 n=16+20)
CountSetBits_1024-8 16.1ns ± 0% 16.1ns ± 1% +0.44% (p=0.000 n=20+20)
name old alloc/op new alloc/op delta
CountSetBits_3-8 0.00B 0.00B ~ (all equal)
CountSetBits_32-8 0.00B 0.00B ~ (all equal)
CountSetBits_128-8 0.00B 0.00B ~ (all equal)
CountSetBits_1000-8 0.00B 0.00B ~ (all equal)
CountSetBits_1024-8 0.00B 0.00B ~ (all equal)
name old allocs/op new allocs/op delta
CountSetBits_3-8 0.00 0.00 ~ (all equal)
CountSetBits_32-8 0.00 0.00 ~ (all equal)
CountSetBits_128-8 0.00 0.00 ~ (all equal)
CountSetBits_1000-8 0.00 0.00 ~ (all equal)
CountSetBits_1024-8 0.00 0.00 ~ (all equal)
```
I believe some of that speed bump could be recoup'd (see `FIXME` in `go/arrow/internal/bitutil/bitutil.go`)
@stuartcarnie PTAL.
needs https://github.com/apache/arrow/pull/2412.
Author: Sebastien Binet <bi...@cern.ch>
Closes #2419 from sbinet/issue-3036 and squashes the following commits:
547c6096 <Sebastien Binet> ARROW-3036: implement array.NewSlice
767c281a <Sebastien Binet> add support for nullbitmap with offset
cb6cdbba <Sebastien Binet> consolidate List array
---
go/arrow/array/array.go | 19 +++-
go/arrow/array/array_test.go | 156 +++++++++++++++++++++++++--
go/arrow/array/binarybuilder.go | 2 +-
go/arrow/array/boolean.go | 2 +-
go/arrow/array/booleanbuilder.go | 2 +-
go/arrow/array/data.go | 45 +++++++-
go/arrow/array/list.go | 151 ++++++++++++++++++++++++++
go/arrow/array/listbuilder.go | 170 ------------------------------
go/arrow/array/numeric.gen.go | 33 ++++++
go/arrow/array/numeric.gen.go.tmpl | 5 +-
go/arrow/array/numeric_test.go | 102 +++++++++++++++++-
go/arrow/array/numericbuilder.gen.go | 22 ++--
go/arrow/array/numericbuilder.gen.go.tmpl | 4 +-
go/arrow/array/struct.go | 1 +
go/arrow/example_test.go | 32 ++++++
go/arrow/internal/bitutil/bitutil.go | 55 +++++++++-
go/arrow/internal/bitutil/bitutil_test.go | 101 +++++++++++++++---
17 files changed, 686 insertions(+), 216 deletions(-)
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index f45bec4..9b49ea0 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -93,7 +93,7 @@ func (a *array) DataType() arrow.DataType { return a.data.dtype }
// NullN returns the number of null values in the array.
func (a *array) NullN() int {
if a.data.nulls < 0 {
- a.data.nulls = a.data.length - bitutil.CountSetBits(a.nullBitmapBytes, a.data.length)
+ a.data.nulls = a.data.length - bitutil.CountSetBits(a.nullBitmapBytes, a.data.offset, a.data.length)
}
return a.data.nulls
}
@@ -109,13 +109,13 @@ func (a *array) Len() int { return a.data.length }
// IsNull returns true if value at index is null.
// NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len.
func (a *array) IsNull(i int) bool {
- return len(a.nullBitmapBytes) != 0 && bitutil.BitIsNotSet(a.nullBitmapBytes, i)
+ return len(a.nullBitmapBytes) != 0 && bitutil.BitIsNotSet(a.nullBitmapBytes, a.data.offset+i)
}
// IsValid returns true if value at index is not null.
// NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len.
func (a *array) IsValid(i int) bool {
- return len(a.nullBitmapBytes) == 0 || bitutil.BitIsSet(a.nullBitmapBytes, i)
+ return len(a.nullBitmapBytes) == 0 || bitutil.BitIsSet(a.nullBitmapBytes, a.data.offset+i)
}
func (a *array) setData(data *Data) {
@@ -184,6 +184,19 @@ func MakeFromData(data *Data) Interface {
return makeArrayFn[byte(data.dtype.ID()&0x1f)](data)
}
+// NewSlice constructs a zero-copy slice of the array with the indicated
+// indices i and j, corresponding to array[i:j].
+// The returned array must be Release()'d after use.
+//
+// NewSlice panics if the slice is outside the valid range of the input array.
+// NewSlice panics if j < i.
+func NewSlice(arr Interface, i, j int64) Interface {
+ data := NewSliceData(arr.Data(), i, j)
+ slice := MakeFromData(data)
+ data.Release()
+ return slice
+}
+
func init() {
makeArrayFn[arrow.LIST] = func(data *Data) Interface { return NewListData(data) }
makeArrayFn[arrow.STRUCT] = func(data *Data) Interface { return NewStructData(data) }
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index b442c76..5c82709 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -62,14 +62,14 @@ func TestMakeFromData(t *testing.T) {
{name: "timestamp", d: &testDataType{arrow.TIMESTAMP}},
{name: "list", d: &testDataType{arrow.LIST}, child: []*array.Data{
- array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
- array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
}},
{name: "struct", d: &testDataType{arrow.STRUCT}},
{name: "struct", d: &testDataType{arrow.STRUCT}, child: []*array.Data{
- array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
- array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0),
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
+ array.NewData(&testDataType{arrow.INT64}, 0, make([]*memory.Buffer, 4), nil, 0, 0),
}},
// invalid types
@@ -83,7 +83,7 @@ func TestMakeFromData(t *testing.T) {
if test.size != 0 {
n = test.size
}
- data := array.NewData(test.d, 0, b[:n], test.child, 0)
+ data := array.NewData(test.d, 0, b[:n], test.child, 0, 0)
if test.expPanic {
assert.PanicsWithValue(t, test.expError, func() {
@@ -116,7 +116,7 @@ func TestArray_NullN(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
buf := memory.NewBufferBytes(test.bm)
- data := array.NewData(arrow.FixedWidthTypes.Boolean, test.l, []*memory.Buffer{buf, nil}, nil, test.n)
+ data := array.NewData(arrow.FixedWidthTypes.Boolean, test.l, []*memory.Buffer{buf, nil}, nil, test.n, 0)
buf.Release()
ar := array.MakeFromData(data)
data.Release()
@@ -126,3 +126,147 @@ func TestArray_NullN(t *testing.T) {
})
}
}
+
+func TestArraySlice(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ var (
+ valids = []bool{true, true, true, false, true, true}
+ vs = []float64{1, 2, 3, 0, 4, 5}
+ )
+
+ b := array.NewFloat64Builder(pool)
+ defer b.Release()
+
+ for _, tc := range []struct {
+ i, j int
+ panics bool
+ len int
+ }{
+ {i: 0, j: len(valids), panics: false, len: len(valids)},
+ {i: len(valids), j: len(valids), panics: false, len: 0},
+ {i: 0, j: 1, panics: false, len: 1},
+ {i: 1, j: 1, panics: false, len: 0},
+ {i: 0, j: len(valids) + 1, panics: true},
+ {i: 2, j: 1, panics: true},
+ {i: len(valids) + 1, j: len(valids) + 1, panics: true},
+ } {
+ t.Run("", func(t *testing.T) {
+ b.AppendValues(vs, valids)
+
+ arr := b.NewFloat64Array()
+ defer arr.Release()
+
+ if got, want := arr.Len(), len(valids); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if tc.panics {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("this should have panicked, but did not")
+ }
+ }()
+ }
+
+ slice := array.NewSlice(arr, int64(tc.i), int64(tc.j)).(*array.Float64)
+ defer slice.Release()
+
+ if got, want := slice.Len(), tc.len; got != want {
+ t.Fatalf("invalid slice length: got=%d, want=%d", got, want)
+ }
+ })
+ }
+}
+
+func TestArraySliceTypes(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ valids := []bool{true, true, true, false, true, true}
+
+ for _, tc := range []struct {
+ values interface{}
+ builder array.Builder
+ append func(b array.Builder, vs interface{})
+ }{
+ {
+ values: []bool{true, false, true, false, true, false},
+ builder: array.NewBooleanBuilder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.BooleanBuilder).AppendValues(vs.([]bool), valids) },
+ },
+ {
+ values: []uint8{1, 2, 3, 0, 4, 5},
+ builder: array.NewUint8Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Uint8Builder).AppendValues(vs.([]uint8), valids) },
+ },
+ {
+ values: []uint16{1, 2, 3, 0, 4, 5},
+ builder: array.NewUint16Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Uint16Builder).AppendValues(vs.([]uint16), valids) },
+ },
+ {
+ values: []uint32{1, 2, 3, 0, 4, 5},
+ builder: array.NewUint32Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Uint32Builder).AppendValues(vs.([]uint32), valids) },
+ },
+ {
+ values: []uint64{1, 2, 3, 0, 4, 5},
+ builder: array.NewUint64Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Uint64Builder).AppendValues(vs.([]uint64), valids) },
+ },
+ {
+ values: []int8{1, 2, 3, 0, 4, 5},
+ builder: array.NewInt8Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Int8Builder).AppendValues(vs.([]int8), valids) },
+ },
+ {
+ values: []int16{1, 2, 3, 0, 4, 5},
+ builder: array.NewInt16Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Int16Builder).AppendValues(vs.([]int16), valids) },
+ },
+ {
+ values: []int32{1, 2, 3, 0, 4, 5},
+ builder: array.NewInt32Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Int32Builder).AppendValues(vs.([]int32), valids) },
+ },
+ {
+ values: []int64{1, 2, 3, 0, 4, 5},
+ builder: array.NewInt64Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Int64Builder).AppendValues(vs.([]int64), valids) },
+ },
+ {
+ values: []float32{1, 2, 3, 0, 4, 5},
+ builder: array.NewFloat32Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Float32Builder).AppendValues(vs.([]float32), valids) },
+ },
+ {
+ values: []float64{1, 2, 3, 0, 4, 5},
+ builder: array.NewFloat64Builder(pool),
+ append: func(b array.Builder, vs interface{}) { b.(*array.Float64Builder).AppendValues(vs.([]float64), valids) },
+ },
+ } {
+ t.Run("", func(t *testing.T) {
+ defer tc.builder.Release()
+
+ b := tc.builder
+ tc.append(b, tc.values)
+
+ arr := b.NewArray()
+ defer arr.Release()
+
+ if got, want := arr.Len(), len(valids); got != want {
+ t.Fatalf("invalid length: got=%d, want=%d", got, want)
+ }
+
+ slice := array.NewSlice(arr, 2, 5)
+ defer slice.Release()
+
+ if got, want := slice.Len(), 3; got != want {
+ t.Fatalf("invalid slice length: got=%d, want=%d", got, want)
+ }
+ })
+ }
+}
diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go
index 8534d86..d32d8d1 100644
--- a/go/arrow/array/binarybuilder.go
+++ b/go/arrow/array/binarybuilder.go
@@ -163,7 +163,7 @@ func (b *BinaryBuilder) NewBinaryArray() (a *Binary) {
func (b *BinaryBuilder) newData() (data *Data) {
b.appendNextOffset()
offsets, values := b.offsets.Finish(), b.values.Finish()
- data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls)
+ data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls, 0)
if offsets != nil {
offsets.Release()
}
diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go
index 49468e5..423fcbb 100644
--- a/go/arrow/array/boolean.go
+++ b/go/arrow/array/boolean.go
@@ -32,7 +32,7 @@ type Boolean struct {
// The nullBitmap buffer can be nil of there are no null values.
// If nulls is not known, use UnknownNullCount to calculate the value of NullN at runtime from the nullBitmap buffer.
func NewBoolean(length int, data *memory.Buffer, nullBitmap *memory.Buffer, nulls int) *Boolean {
- return NewBooleanData(NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nullBitmap, data}, nil, nulls))
+ return NewBooleanData(NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nullBitmap, data}, nil, nulls, 0))
}
func NewBooleanData(data *Data) *Boolean {
diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go
index 89d38f5..28baa9b 100644
--- a/go/arrow/array/booleanbuilder.go
+++ b/go/arrow/array/booleanbuilder.go
@@ -144,7 +144,7 @@ func (b *BooleanBuilder) newData() *Data {
// trim buffers
b.data.Resize(bytesRequired)
}
- res := NewData(arrow.FixedWidthTypes.Boolean, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ res := NewData(arrow.FixedWidthTypes.Boolean, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go
index 72a88ed..f4dea17 100644
--- a/go/arrow/array/data.go
+++ b/go/arrow/array/data.go
@@ -29,12 +29,13 @@ type Data struct {
refCount int64
dtype arrow.DataType
nulls int
+ offset int
length int
buffers []*memory.Buffer // TODO(sgc): should this be an interface?
childData []*Data // TODO(sgc): managed by ListArray, StructArray and UnionArray types
}
-func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []*Data, nulls int) *Data {
+func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []*Data, nulls, offset int) *Data {
for _, b := range buffers {
if b != nil {
b.Retain()
@@ -52,6 +53,7 @@ func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childDa
dtype: dtype,
nulls: nulls,
length: length,
+ offset: offset,
buffers: buffers,
childData: childData,
}
@@ -86,3 +88,44 @@ func (d *Data) Release() {
func (d *Data) DataType() arrow.DataType { return d.dtype }
func (d *Data) NullN() int { return d.nulls }
func (d *Data) Len() int { return d.length }
+
+// NewSliceData returns a new slice that shares backing data with the input.
+// The returned Data slice starts at i and extends j-i elements, such as:
+// slice := data[i:j]
+// The returned value must be Release'd after use.
+//
+// NewSliceData panics if the slice is outside the valid range of the input Data.
+// NewSliceData panics if j < i.
+func NewSliceData(data *Data, i, j int64) *Data {
+ if j > int64(data.length) || i > j || data.offset+int(i) > data.length {
+ panic("arrow/array: index out of range")
+ }
+
+ for _, b := range data.buffers {
+ if b != nil {
+ b.Retain()
+ }
+ }
+
+ for _, child := range data.childData {
+ if child != nil {
+ child.Retain()
+ }
+ }
+
+ o := &Data{
+ refCount: 1,
+ dtype: data.dtype,
+ nulls: UnknownNullCount,
+ length: int(j - i),
+ offset: data.offset + int(i),
+ buffers: data.buffers,
+ childData: data.childData,
+ }
+
+ if data.nulls == 0 {
+ o.nulls = 0
+ }
+
+ return o
+}
diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go
index 322df02..1c4c01f 100644
--- a/go/arrow/array/list.go
+++ b/go/arrow/array/list.go
@@ -17,7 +17,12 @@
package array
import (
+ "sync/atomic"
+
"github.com/apache/arrow/go/arrow"
+ "github.com/apache/arrow/go/arrow/internal/bitutil"
+ "github.com/apache/arrow/go/arrow/internal/debug"
+ "github.com/apache/arrow/go/arrow/memory"
)
// List represents an immutable sequence of array values.
@@ -56,6 +61,152 @@ func (a *List) Release() {
a.values.Release()
}
+type ListBuilder struct {
+ builder
+
+ etype arrow.DataType // data type of the list's elements.
+ values Builder // value builder for the list's elements.
+ offsets *Int32Builder
+}
+
+// NewListBuilder returns a builder, using the provided memory allocator.
+// The created list builder will create a list whose elements will be of type etype.
+func NewListBuilder(mem memory.Allocator, etype arrow.DataType) *ListBuilder {
+ return &ListBuilder{
+ builder: builder{refCount: 1, mem: mem},
+ etype: etype,
+ values: newBuilder(mem, etype),
+ offsets: NewInt32Builder(mem),
+ }
+}
+
+// Release decreases the reference count by 1.
+// When the reference count goes to zero, the memory is freed.
+func (b *ListBuilder) Release() {
+ debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
+
+ if atomic.AddInt64(&b.refCount, -1) == 0 {
+ if b.nullBitmap != nil {
+ b.nullBitmap.Release()
+ b.nullBitmap = nil
+ }
+ }
+
+ b.values.Release()
+ b.offsets.Release()
+}
+
+func (b *ListBuilder) appendNextOffset() {
+ b.offsets.Append(int32(b.values.Len()))
+}
+
+func (b *ListBuilder) Append(v bool) {
+ b.Reserve(1)
+ b.unsafeAppendBoolToBitmap(v)
+ b.appendNextOffset()
+}
+
+func (b *ListBuilder) AppendNull() {
+ b.Reserve(1)
+ b.unsafeAppendBoolToBitmap(false)
+ b.appendNextOffset()
+}
+
+func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) {
+ b.Reserve(len(valid))
+ b.offsets.AppendValues(offsets, nil)
+ b.builder.unsafeAppendBoolsToBitmap(valid, len(valid))
+}
+
+func (b *ListBuilder) unsafeAppend(v bool) {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ b.length++
+}
+
+func (b *ListBuilder) unsafeAppendBoolToBitmap(isValid bool) {
+ if isValid {
+ bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
+ } else {
+ b.nulls++
+ }
+ b.length++
+}
+
+func (b *ListBuilder) init(capacity int) {
+ b.builder.init(capacity)
+ b.offsets.init(capacity + 1)
+}
+
+// Reserve ensures there is enough space for appending n elements
+// by checking the capacity and calling Resize if necessary.
+func (b *ListBuilder) Reserve(n int) {
+ b.builder.reserve(n, b.Resize)
+}
+
+// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
+// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
+func (b *ListBuilder) Resize(n int) {
+ if n < minBuilderCapacity {
+ n = minBuilderCapacity
+ }
+
+ if b.capacity == 0 {
+ b.init(n)
+ } else {
+ b.builder.resize(n, b.builder.init)
+ b.offsets.resize(n+1, b.offsets.init)
+ }
+}
+
+func (b *ListBuilder) ValueBuilder() Builder {
+ return b.values
+}
+
+// NewArray creates a List array from the memory buffers used by the builder and resets the ListBuilder
+// so it can be used to build a new array.
+func (b *ListBuilder) NewArray() Interface {
+ return b.NewListArray()
+}
+
+// NewListArray creates a List array from the memory buffers used by the builder and resets the ListBuilder
+// so it can be used to build a new array.
+func (b *ListBuilder) NewListArray() (a *List) {
+ if b.offsets.Len() != b.length+1 {
+ b.appendNextOffset()
+ }
+ data := b.newData()
+ a = NewListData(data)
+ data.Release()
+ return
+}
+
+func (b *ListBuilder) newData() (data *Data) {
+ values := b.values.NewArray()
+ defer values.Release()
+
+ var offsets *memory.Buffer
+ if b.offsets != nil {
+ arr := b.offsets.NewInt32Array()
+ defer arr.Release()
+ offsets = arr.Data().buffers[1]
+ }
+
+ data = NewData(
+ arrow.ListOf(b.etype), b.length,
+ []*memory.Buffer{
+ b.nullBitmap,
+ offsets,
+ },
+ []*Data{values.Data()},
+ b.nulls,
+ 0,
+ )
+ b.reset()
+
+ return
+}
+
var (
_ Interface = (*List)(nil)
+ _ Builder = (*ListBuilder)(nil)
)
diff --git a/go/arrow/array/listbuilder.go b/go/arrow/array/listbuilder.go
deleted file mode 100644
index 2bef68a..0000000
--- a/go/arrow/array/listbuilder.go
+++ /dev/null
@@ -1,170 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package array
-
-import (
- "sync/atomic"
-
- "github.com/apache/arrow/go/arrow"
- "github.com/apache/arrow/go/arrow/internal/bitutil"
- "github.com/apache/arrow/go/arrow/internal/debug"
- "github.com/apache/arrow/go/arrow/memory"
-)
-
-type ListBuilder struct {
- builder
-
- etype arrow.DataType // data type of the list's elements.
- values Builder // value builder for the list's elements.
- offsets *Int32Builder
-}
-
-// NewListBuilder returns a builder, using the provided memory allocator.
-// The created list builder will create a list whose elements will be of type etype.
-func NewListBuilder(mem memory.Allocator, etype arrow.DataType) *ListBuilder {
- return &ListBuilder{
- builder: builder{refCount: 1, mem: mem},
- etype: etype,
- values: newBuilder(mem, etype),
- offsets: NewInt32Builder(mem),
- }
-}
-
-// Release decreases the reference count by 1.
-// When the reference count goes to zero, the memory is freed.
-func (b *ListBuilder) Release() {
- debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases")
-
- if atomic.AddInt64(&b.refCount, -1) == 0 {
- if b.nullBitmap != nil {
- b.nullBitmap.Release()
- b.nullBitmap = nil
- }
- }
-
- b.values.Release()
- b.offsets.Release()
-}
-
-func (b *ListBuilder) appendNextOffset() {
- b.offsets.Append(int32(b.values.Len()))
-}
-
-func (b *ListBuilder) Append(v bool) {
- b.Reserve(1)
- b.unsafeAppendBoolToBitmap(v)
- b.appendNextOffset()
-}
-
-func (b *ListBuilder) AppendNull() {
- b.Reserve(1)
- b.unsafeAppendBoolToBitmap(false)
- b.appendNextOffset()
-}
-
-func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) {
- b.Reserve(len(valid))
- b.offsets.AppendValues(offsets, nil)
- b.builder.unsafeAppendBoolsToBitmap(valid, len(valid))
-}
-
-func (b *ListBuilder) unsafeAppend(v bool) {
- bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
- b.length++
-}
-
-func (b *ListBuilder) unsafeAppendBoolToBitmap(isValid bool) {
- if isValid {
- bitutil.SetBit(b.nullBitmap.Bytes(), b.length)
- } else {
- b.nulls++
- }
- b.length++
-}
-
-func (b *ListBuilder) init(capacity int) {
- b.builder.init(capacity)
- b.offsets.init(capacity + 1)
-}
-
-// Reserve ensures there is enough space for appending n elements
-// by checking the capacity and calling Resize if necessary.
-func (b *ListBuilder) Reserve(n int) {
- b.builder.reserve(n, b.Resize)
-}
-
-// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
-// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
-func (b *ListBuilder) Resize(n int) {
- if n < minBuilderCapacity {
- n = minBuilderCapacity
- }
-
- if b.capacity == 0 {
- b.init(n)
- } else {
- b.builder.resize(n, b.builder.init)
- b.offsets.resize(n+1, b.offsets.init)
- }
-}
-
-func (b *ListBuilder) ValueBuilder() Builder {
- return b.values
-}
-
-// NewArray creates a List array from the memory buffers used by the builder and resets the ListBuilder
-// so it can be used to build a new array.
-func (b *ListBuilder) NewArray() Interface {
- return b.NewListArray()
-}
-
-// NewListArray creates a List array from the memory buffers used by the builder and resets the ListBuilder
-// so it can be used to build a new array.
-func (b *ListBuilder) NewListArray() (a *List) {
- if b.offsets.Len() != b.length+1 {
- b.appendNextOffset()
- }
- data := b.newData()
- a = NewListData(data)
- data.Release()
- return
-}
-
-func (b *ListBuilder) newData() (data *Data) {
- values := b.values.NewArray()
- defer values.Release()
-
- var offsets *memory.Buffer
- if b.offsets != nil {
- arr := b.offsets.NewInt32Array()
- defer arr.Release()
- offsets = arr.Data().buffers[1]
- }
-
- data = NewData(
- arrow.ListOf(b.etype), b.length,
- []*memory.Buffer{
- b.nullBitmap,
- offsets,
- },
- []*Data{values.Data()},
- b.nulls,
- )
- b.reset()
-
- return
-}
diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go
index 02903f9..22da6ce 100644
--- a/go/arrow/array/numeric.gen.go
+++ b/go/arrow/array/numeric.gen.go
@@ -43,6 +43,9 @@ func (a *Int64) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Int64Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -67,6 +70,9 @@ func (a *Uint64) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Uint64Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -91,6 +97,9 @@ func (a *Float64) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Float64Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -115,6 +124,9 @@ func (a *Int32) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Int32Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -139,6 +151,9 @@ func (a *Uint32) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Uint32Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -163,6 +178,9 @@ func (a *Float32) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Float32Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -187,6 +205,9 @@ func (a *Int16) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Int16Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -211,6 +232,9 @@ func (a *Uint16) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Uint16Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -235,6 +259,9 @@ func (a *Int8) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Int8Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -259,6 +286,9 @@ func (a *Uint8) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.Uint8Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
@@ -283,5 +313,8 @@ func (a *Timestamp) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl
index 5c33a36..5aa3f71 100644
--- a/go/arrow/array/numeric.gen.go.tmpl
+++ b/go/arrow/array/numeric.gen.go.tmpl
@@ -43,6 +43,9 @@ func (a *{{.Name}}) setData(data *Data) {
vals := data.buffers[1]
if vals != nil {
a.values = arrow.{{.Name}}Traits.CastFromBytes(vals.Bytes())
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ a.values = a.values[beg:end]
}
}
-{{end}}
\ No newline at end of file
+{{end}}
diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go
index 9f29394..352ccd1 100644
--- a/go/arrow/array/numeric_test.go
+++ b/go/arrow/array/numeric_test.go
@@ -17,6 +17,7 @@
package array_test
import (
+ "reflect"
"testing"
"github.com/apache/arrow/go/arrow"
@@ -28,9 +29,108 @@ import (
func TestNewFloat64Data(t *testing.T) {
exp := []float64{1.0, 2.0, 4.0, 8.0, 16.0}
- ad := array.NewData(arrow.PrimitiveTypes.Float64, len(exp), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(exp))}, nil, 0)
+ ad := array.NewData(
+ arrow.PrimitiveTypes.Float64, len(exp),
+ []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(exp))},
+ nil, 0, 0,
+ )
fa := array.NewFloat64Data(ad)
assert.Equal(t, len(exp), fa.Len(), "unexpected Len()")
assert.Equal(t, exp, fa.Float64Values(), "unexpected Float64Values()")
}
+
+func TestFloat64SliceData(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ const (
+ beg = 2
+ end = 4
+ )
+
+ var (
+ vs = []float64{1, 2, 3, 4, 5}
+ sub = vs[beg:end]
+ )
+
+ b := array.NewFloat64Builder(pool)
+ defer b.Release()
+
+ for _, v := range vs {
+ b.Append(v)
+ }
+
+ arr := b.NewArray().(*array.Float64)
+ defer arr.Release()
+
+ if got, want := arr.Len(), len(vs); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ slice := array.NewSlice(arr, beg, end).(*array.Float64)
+ defer slice.Release()
+
+ if got, want := slice.Len(), len(sub); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+}
+
+func TestFloat64SliceDataWithNull(t *testing.T) {
+ pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer pool.AssertSize(t, 0)
+
+ const (
+ beg = 2
+ end = 5
+ )
+
+ var (
+ valids = []bool{true, true, true, false, true, true}
+ vs = []float64{1, 2, 3, 0, 4, 5}
+ sub = vs[beg:end]
+ )
+
+ b := array.NewFloat64Builder(pool)
+ defer b.Release()
+
+ b.AppendValues(vs, valids)
+
+ arr := b.NewArray().(*array.Float64)
+ defer arr.Release()
+
+ if got, want := arr.Len(), len(valids); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := arr.NullN(), 1; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ slice := array.NewSlice(arr, beg, end).(*array.Float64)
+ defer slice.Release()
+
+ if got, want := slice.NullN(), 1; got != want {
+ t.Errorf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := slice.Len(), len(sub); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+}
diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go
index c4d7cf4..640f556 100644
--- a/go/arrow/array/numericbuilder.gen.go
+++ b/go/arrow/array/numericbuilder.gen.go
@@ -148,7 +148,7 @@ func (b *Int64Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Int64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Int64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -281,7 +281,7 @@ func (b *Uint64Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Uint64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Uint64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -414,7 +414,7 @@ func (b *Float64Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Float64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Float64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -547,7 +547,7 @@ func (b *Int32Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Int32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Int32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -680,7 +680,7 @@ func (b *Uint32Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Uint32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Uint32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -813,7 +813,7 @@ func (b *Float32Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Float32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Float32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -946,7 +946,7 @@ func (b *Int16Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Int16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Int16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -1079,7 +1079,7 @@ func (b *Uint16Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Uint16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Uint16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -1212,7 +1212,7 @@ func (b *Int8Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Int8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Int8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -1345,7 +1345,7 @@ func (b *Uint8Builder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(arrow.PrimitiveTypes.Uint8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.Uint8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
@@ -1479,7 +1479,7 @@ func (b *TimestampBuilder) newData() (data *Data) {
// trim buffers
b.data.Resize(bytesRequired)
}
- data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
b.reset()
if b.data != nil {
diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl
index 9705a5c..8fd4d7a 100644
--- a/go/arrow/array/numericbuilder.gen.go.tmpl
+++ b/go/arrow/array/numericbuilder.gen.go.tmpl
@@ -155,9 +155,9 @@ func (b *{{.Name}}Builder) newData() (data *Data) {
b.data.Resize(bytesRequired)
}
{{if .Opt.Parametric -}}
- data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
{{else -}}
- data = NewData(arrow.PrimitiveTypes.{{.Name}}, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls)
+ data = NewData(arrow.PrimitiveTypes.{{.Name}}, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0)
{{end -}}
b.reset()
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
index 9e0c1aa..4797f18 100644
--- a/go/arrow/array/struct.go
+++ b/go/arrow/array/struct.go
@@ -189,6 +189,7 @@ func (b *StructBuilder) newData() (data *Data) {
},
fields,
b.nulls,
+ 0,
)
b.reset()
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 20912e7..ff77c6b 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -288,3 +288,35 @@ func Example_structArray() {
// Struct[2] = (null)
// Struct[3] = [[m, a, r, k], 4]
}
+
+// This example shows how one can slice an array.
+// The initial (float64) array is:
+// [1, 2, 3, (null), 4, 5]
+//
+// and the sub-slice is:
+// [3, (null), 4]
+func Example_float64Slice() {
+ pool := memory.NewGoAllocator()
+
+ b := array.NewFloat64Builder(pool)
+ defer b.Release()
+
+ b.AppendValues(
+ []float64{1, 2, 3, -1, 4, 5},
+ []bool{true, true, true, false, true, true},
+ )
+
+ arr := b.NewFloat64Array()
+ defer arr.Release()
+
+ fmt.Printf("array = %v\n", arr.Float64Values())
+
+ sli := array.NewSlice(arr, 2, 5).(*array.Float64)
+ defer sli.Release()
+
+ fmt.Printf("slice = %v\n", sli.Float64Values())
+
+ // Output:
+ // array = [1 2 3 -1 4 5]
+ // slice = [3 -1 4]
+}
diff --git a/go/arrow/internal/bitutil/bitutil.go b/go/arrow/internal/bitutil/bitutil.go
index 00f1693..ef25a99 100644
--- a/go/arrow/internal/bitutil/bitutil.go
+++ b/go/arrow/internal/bitutil/bitutil.go
@@ -55,7 +55,11 @@ func SetBitTo(buf []byte, i int, val bool) {
}
// CountSetBits counts the number of 1's in buf up to n bits.
-func CountSetBits(buf []byte, n int) int {
+func CountSetBits(buf []byte, offset, n int) int {
+ if offset > 0 {
+ return countSetBitsWithOffset(buf, offset, n)
+ }
+
count := 0
uint64Bytes := n / uint64SizeBits * 8
@@ -77,6 +81,55 @@ func CountSetBits(buf []byte, n int) int {
return count
}
+func countSetBitsWithOffset(buf []byte, offset, n int) int {
+ count := 0
+
+ beg := offset
+ end := offset + n
+
+ begU8 := roundUp(beg, uint64SizeBits)
+
+ init := min(n, begU8-beg)
+ for i := offset; i < beg+init; i++ {
+ if BitIsSet(buf, i) {
+ count++
+ }
+ }
+
+ nU64 := (n - init) / uint64SizeBits
+ begU64 := begU8 / uint64SizeBits
+ endU64 := begU64 + nU64
+ bufU64 := bytesToUint64(buf)
+ if begU64 < len(bufU64) {
+ for _, v := range bufU64[begU64:endU64] {
+ count += bits.OnesCount64(v)
+ }
+ }
+
+ // FIXME: use a fallback to bits.OnesCount8
+ // before counting the tail bits.
+
+ tail := beg + init + nU64*uint64SizeBits
+ for i := tail; i < end; i++ {
+ if BitIsSet(buf, i) {
+ count++
+ }
+ }
+
+ return count
+}
+
+func roundUp(v, f int) int {
+ return (v + (f - 1)) / f * f
+}
+
+func min(a, b int) int {
+ if a < b {
+ return a
+ }
+ return b
+}
+
const (
uint64SizeBytes = int(unsafe.Sizeof(uint64(0)))
uint64SizeBits = uint64SizeBytes * 8
diff --git a/go/arrow/internal/bitutil/bitutil_test.go b/go/arrow/internal/bitutil/bitutil_test.go
index b1890ec..108db9a 100644
--- a/go/arrow/internal/bitutil/bitutil_test.go
+++ b/go/arrow/internal/bitutil/bitutil_test.go
@@ -17,6 +17,7 @@
package bitutil_test
import (
+ "math/rand"
"testing"
"github.com/apache/arrow/go/arrow/internal/bitutil"
@@ -99,27 +100,73 @@ func TestCountSetBits(t *testing.T) {
tests := []struct {
name string
buf []byte
+ off int
n int
exp int
}{
- {"some 03 bits", bbits(0x11000000), 3, 2},
- {"some 11 bits", bbits(0x11000011, 0x01000000), 11, 5},
- {"some 72 bits", bbits(0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x10001001), 9 * 8, 35},
- {"all 03 bits", bbits(0x11100001), 3, 3},
- {"all 11 bits", bbits(0x11111111, 0x11111111), 11, 11},
- {"all 72 bits", bbits(0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111), 9 * 8, 72},
- {"none 03 bits", bbits(0x00000001), 3, 0},
- {"none 11 bits", bbits(0x00000000, 0x00000000), 11, 0},
- {"none 72 bits", bbits(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), 9 * 8, 0},
+ {"some 03 bits", bbits(0x11000000), 0, 3, 2},
+ {"some 11 bits", bbits(0x11000011, 0x01000000), 0, 11, 5},
+ {"some 72 bits", bbits(0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x10001001), 0, 9 * 8, 35},
+ {"all 08 bits", bbits(0x11111110), 0, 8, 7},
+ {"all 03 bits", bbits(0x11100001), 0, 3, 3},
+ {"all 11 bits", bbits(0x11111111, 0x11111111), 0, 11, 11},
+ {"all 72 bits", bbits(0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111), 0, 9 * 8, 72},
+ {"none 03 bits", bbits(0x00000001), 0, 3, 0},
+ {"none 11 bits", bbits(0x00000000, 0x00000000), 0, 11, 0},
+ {"none 72 bits", bbits(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 9 * 8, 0},
+
+ {"some 03 bits - offset+1", bbits(0x11000000), 1, 3, 1},
+ {"some 03 bits - offset+2", bbits(0x11000000), 2, 3, 0},
+ {"some 11 bits - offset+1", bbits(0x11000011, 0x01000000, 0x00000000), 1, 11, 4},
+ {"some 11 bits - offset+2", bbits(0x11000011, 0x01000000, 0x00000000), 2, 11, 3},
+ {"some 11 bits - offset+3", bbits(0x11000011, 0x01000000, 0x00000000), 3, 11, 3},
+ {"some 11 bits - offset+6", bbits(0x11000011, 0x01000000, 0x00000000), 6, 11, 3},
+ {"some 11 bits - offset+7", bbits(0x11000011, 0x01000000, 0x00000000), 7, 11, 2},
+ {"some 11 bits - offset+8", bbits(0x11000011, 0x01000000, 0x00000000), 8, 11, 1},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- got := bitutil.CountSetBits(test.buf, test.n)
+ got := bitutil.CountSetBits(test.buf, test.off, test.n)
assert.Equal(t, test.exp, got)
})
}
}
+func TestCountSetBitsOffset(t *testing.T) {
+ slowCountSetBits := func(buf []byte, offset, n int) int {
+ count := 0
+ for i := offset; i < offset+n; i++ {
+ if bitutil.BitIsSet(buf, i) {
+ count++
+ }
+ }
+ return count
+ }
+
+ const (
+ bufSize = 1000
+ nbits = bufSize * 8
+ )
+
+ offsets := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 32, 37, 63, 64, 128, nbits - 30, nbits - 64}
+
+ buf := make([]byte, bufSize)
+
+ rng := rand.New(rand.NewSource(0))
+ _, err := rng.Read(buf)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ for i, offset := range offsets {
+ want := slowCountSetBits(buf, offset, nbits-offset)
+ got := bitutil.CountSetBits(buf, offset, nbits-offset)
+ if got != want {
+ t.Errorf("offset[%2d/%2d]=%5d. got=%5d, want=%5d", i+1, len(offsets), offset, got, want)
+ }
+ }
+}
+
func bbits(v ...int32) []byte {
return tools.IntsToBitsLSB(v...)
}
@@ -153,7 +200,7 @@ var (
intval int
)
-func benchmarkCountSetBitsN(b *testing.B, n int) {
+func benchmarkCountSetBitsN(b *testing.B, offset, n int) {
nn := n/8 + 1
buf := make([]byte, nn)
//src := [4]byte{0x1f, 0xaa, 0xba, 0x11}
@@ -164,27 +211,47 @@ func benchmarkCountSetBitsN(b *testing.B, n int) {
b.ResetTimer()
var res int
for i := 0; i < b.N; i++ {
- res = bitutil.CountSetBits(buf, n)
+ res = bitutil.CountSetBits(buf, offset, n-offset)
}
intval = res
}
func BenchmarkCountSetBits_3(b *testing.B) {
- benchmarkCountSetBitsN(b, 3)
+ benchmarkCountSetBitsN(b, 0, 3)
}
func BenchmarkCountSetBits_32(b *testing.B) {
- benchmarkCountSetBitsN(b, 32)
+ benchmarkCountSetBitsN(b, 0, 32)
}
func BenchmarkCountSetBits_128(b *testing.B) {
- benchmarkCountSetBitsN(b, 128)
+ benchmarkCountSetBitsN(b, 0, 128)
}
func BenchmarkCountSetBits_1000(b *testing.B) {
- benchmarkCountSetBitsN(b, 1000)
+ benchmarkCountSetBitsN(b, 0, 1000)
}
func BenchmarkCountSetBits_1024(b *testing.B) {
- benchmarkCountSetBitsN(b, 1024)
+ benchmarkCountSetBitsN(b, 0, 1024)
+}
+
+func BenchmarkCountSetBitsOffset_3(b *testing.B) {
+ benchmarkCountSetBitsN(b, 1, 3)
+}
+
+func BenchmarkCountSetBitsOffset_32(b *testing.B) {
+ benchmarkCountSetBitsN(b, 1, 32)
+}
+
+func BenchmarkCountSetBitsOffset_128(b *testing.B) {
+ benchmarkCountSetBitsN(b, 1, 128)
+}
+
+func BenchmarkCountSetBitsOffset_1000(b *testing.B) {
+ benchmarkCountSetBitsN(b, 1, 1000)
+}
+
+func BenchmarkCountSetBitsOffset_1024(b *testing.B) {
+ benchmarkCountSetBitsN(b, 1, 1024)
}