You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/01/05 20:29:16 UTC
[arrow] branch master updated: ARROW-4130: [Go] offset not used
when accessing binary array
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 91a72ac ARROW-4130: [Go] offset not used when accessing binary array
91a72ac is described below
commit 91a72ac7e1361b78167a703ecd5dddb85b621159
Author: jlapacik <jo...@pacbell.net>
AuthorDate: Sat Jan 5 14:19:52 2019 -0600
ARROW-4130: [Go] offset not used when accessing binary array
Closes https://github.com/apache/arrow/issues/3270 .
Author: jlapacik <jo...@pacbell.net>
Closes #3283 from jlapacik/fix/go-binary-slice and squashes the following commits:
5cf6a4f03 <jlapacik> assign slice value in out of bounds tests
0666c0ed4 <jlapacik> allocate new slice for each test case
9b5a00057 <jlapacik> remove single letter variable
b46f8412d <jlapacik> ARROW-4130: offset not used when accessing binary array
---
go/arrow/array/binary.go | 38 ++++-
go/arrow/array/binary_test.go | 343 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 376 insertions(+), 5 deletions(-)
diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index 0b89b7e..850fb09 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -38,7 +38,13 @@ func NewBinaryData(data *Data) *Binary {
}
// Value returns the slice at index i. This value should not be mutated.
-func (a *Binary) Value(i int) []byte { return a.valueBytes[a.valueOffsets[i]:a.valueOffsets[i+1]] }
+func (a *Binary) Value(i int) []byte {
+ if i < 0 || i >= a.array.data.length {
+ panic("arrow/array: index out of range")
+ }
+ idx := a.array.data.offset + i
+ return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]]
+}
// ValueString returns the string at index i without performing additional allocations.
// The string is only valid for the lifetime of the Binary array.
@@ -47,10 +53,32 @@ func (a *Binary) ValueString(i int) string {
return *(*string)(unsafe.Pointer(&b))
}
-func (a *Binary) ValueOffset(i int) int { return int(a.valueOffsets[i]) }
-func (a *Binary) ValueLen(i int) int { return int(a.valueOffsets[i+1] - a.valueOffsets[i]) }
-func (a *Binary) ValueOffsets() []int32 { return a.valueOffsets }
-func (a *Binary) ValueBytes() []byte { return a.valueBytes }
+func (a *Binary) ValueOffset(i int) int {
+ if i < 0 || i >= a.array.data.length {
+ panic("arrow/array: index out of range")
+ }
+ return int(a.valueOffsets[a.array.data.offset+i])
+}
+
+func (a *Binary) ValueLen(i int) int {
+ if i < 0 || i >= a.array.data.length {
+ panic("arrow/array: index out of range")
+ }
+ beg := a.array.data.offset + i
+ return int(a.valueOffsets[beg+1] - a.valueOffsets[beg])
+}
+
+func (a *Binary) ValueOffsets() []int32 {
+ beg := a.array.data.offset
+ end := beg + a.array.data.length + 1
+ return a.valueOffsets[beg:end]
+}
+
+func (a *Binary) ValueBytes() []byte {
+ beg := a.array.data.offset
+ end := beg + a.array.data.length
+ return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]]
+}
func (a *Binary) setData(data *Data) {
if len(data.buffers) != 3 {
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index 87d1b58..2af45de 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -17,6 +17,7 @@
package array
import (
+ "reflect"
"testing"
"github.com/stretchr/testify/assert"
@@ -62,3 +63,345 @@ func TestBinary(t *testing.T) {
b.Release()
}
+
+func TestBinarySliceData(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ for _, v := range values {
+ b.AppendString(v)
+ }
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ if got, want := arr.Len(), len(values); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ vs := make([]string, arr.Len())
+
+ for i := range vs {
+ vs[i] = arr.ValueString(i)
+ }
+
+ if got, want := vs, values; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ tests := []struct {
+ interval [2]int64
+ want []string
+ }{
+ {
+ interval: [2]int64{0, 0},
+ want: []string{},
+ },
+ {
+ interval: [2]int64{0, 5},
+ want: []string{"a", "bc", "def", "g", "hijk"},
+ },
+ {
+ interval: [2]int64{0, 10},
+ want: []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"},
+ },
+ {
+ interval: [2]int64{5, 10},
+ want: []string{"lm", "n", "opq", "rs", "tu"},
+ },
+ {
+ interval: [2]int64{10, 10},
+ want: []string{},
+ },
+ {
+ interval: [2]int64{2, 7},
+ want: []string{"def", "g", "hijk", "lm", "n"},
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+
+ slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary)
+ defer slice.Release()
+
+ if got, want := slice.Len(), len(tc.want); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ vs := make([]string, slice.Len())
+
+ for i := range vs {
+ vs[i] = slice.ValueString(i)
+ }
+
+ if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ })
+ }
+}
+
+func TestBinarySliceDataWithNull(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+ valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ b.AppendStringValues(values, valids)
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ if got, want := arr.Len(), len(values); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := arr.NullN(), 3; got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ vs := make([]string, arr.Len())
+
+ for i := range vs {
+ vs[i] = arr.ValueString(i)
+ }
+
+ if got, want := vs, values; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+
+ tests := []struct {
+ interval [2]int64
+ nulls int
+ want []string
+ }{
+ {
+ interval: [2]int64{0, 2},
+ nulls: 0,
+ want: []string{"a", "bc"},
+ },
+ {
+ interval: [2]int64{0, 3},
+ nulls: 1,
+ want: []string{"a", "bc", ""},
+ },
+ {
+ interval: [2]int64{0, 4},
+ nulls: 2,
+ want: []string{"a", "bc", "", ""},
+ },
+ {
+ interval: [2]int64{4, 8},
+ nulls: 0,
+ want: []string{"hijk", "lm", "", "opq"},
+ },
+ {
+ interval: [2]int64{2, 9},
+ nulls: 3,
+ want: []string{"", "", "hijk", "lm", "", "opq", ""},
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+
+ slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary)
+ defer slice.Release()
+
+ if got, want := slice.Len(), len(tc.want); got != want {
+ t.Fatalf("got=%d, want=%d", got, want)
+ }
+
+ if got, want := slice.NullN(), tc.nulls; got != want {
+ t.Errorf("got=%d, want=%d", got, want)
+ }
+
+ vs := make([]string, slice.Len())
+
+ for i := range vs {
+ vs[i] = slice.ValueString(i)
+ }
+
+ if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+ t.Fatalf("got=%v, want=%v", got, want)
+ }
+ })
+ }
+}
+
+func TestBinarySliceOutOfBounds(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ for _, v := range values {
+ b.AppendString(v)
+ }
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ slice := NewSlice(arr, 3, 8).(*Binary)
+ defer slice.Release()
+
+ tests := []struct {
+ index int
+ panic bool
+ }{
+ {
+ index: -1,
+ panic: true,
+ },
+ {
+ index: 5,
+ panic: true,
+ },
+ {
+ index: 0,
+ panic: false,
+ },
+ {
+ index: 4,
+ panic: false,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+
+ var val string
+
+ if tc.panic {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("this should have panicked, but did not; slice value %q", val)
+ }
+ if got, want := e.(string), "arrow/array: index out of range"; got != want {
+ t.Fatalf("invalid error. got=%q, want=%q", got, want)
+ }
+ }()
+ } else {
+ defer func() {
+ if e := recover(); e != nil {
+ t.Fatalf("unexpected panic: %v", e)
+ }
+ }()
+ }
+
+ val = slice.ValueString(tc.index)
+ })
+ }
+}
+
+func TestBinaryValueOffset(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+ valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ b.AppendStringValues(values, valids)
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ slice := NewSlice(arr, 2, 9).(*Binary)
+ defer slice.Release()
+
+ offset := 3
+ vs := values[2:9]
+
+ for i, v := range vs {
+ assert.Equal(t, offset, slice.ValueOffset(i))
+ offset += len(v)
+ }
+}
+
+func TestBinaryValueLen(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+ valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ b.AppendStringValues(values, valids)
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ slice := NewSlice(arr, 2, 9).(*Binary)
+ defer slice.Release()
+
+ vs := values[2:9]
+
+ for i, v := range vs {
+ assert.Equal(t, len(v), slice.ValueLen(i))
+ }
+}
+
+func TestBinaryValueOffsets(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+ valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ b.AppendStringValues(values, valids)
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets())
+
+ slice := NewSlice(arr, 2, 9).(*Binary)
+ defer slice.Release()
+
+ assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets())
+}
+
+func TestBinaryValueBytes(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+ valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+ b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer b.Release()
+
+ b.AppendStringValues(values, valids)
+
+ arr := b.NewArray().(*Binary)
+ defer arr.Release()
+
+ assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes())
+
+ slice := NewSlice(arr, 2, 9).(*Binary)
+ defer slice.Release()
+
+ assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes())
+}