You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/01/05 20:29:16 UTC

[arrow] branch master updated: ARROW-4130: [Go] offset not used when accessing binary array

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 91a72ac  ARROW-4130: [Go] offset not used when accessing binary array
91a72ac is described below

commit 91a72ac7e1361b78167a703ecd5dddb85b621159
Author: jlapacik <jo...@pacbell.net>
AuthorDate: Sat Jan 5 14:19:52 2019 -0600

    ARROW-4130: [Go] offset not used when accessing binary array
    
    Closes https://github.com/apache/arrow/issues/3270 .
    
    Author: jlapacik <jo...@pacbell.net>
    
    Closes #3283 from jlapacik/fix/go-binary-slice and squashes the following commits:
    
    5cf6a4f03 <jlapacik> assign slice value in out of bounds tests
    0666c0ed4 <jlapacik> allocate new slice for each test case
    9b5a00057 <jlapacik> remove single letter variable
    b46f8412d <jlapacik> ARROW-4130:  offset not used when accessing binary array
---
 go/arrow/array/binary.go      |  38 ++++-
 go/arrow/array/binary_test.go | 343 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 376 insertions(+), 5 deletions(-)

diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index 0b89b7e..850fb09 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -38,7 +38,13 @@ func NewBinaryData(data *Data) *Binary {
 }
 
 // Value returns the slice at index i. This value should not be mutated.
-func (a *Binary) Value(i int) []byte { return a.valueBytes[a.valueOffsets[i]:a.valueOffsets[i+1]] }
+func (a *Binary) Value(i int) []byte {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	idx := a.array.data.offset + i
+	return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]]
+}
 
 // ValueString returns the string at index i without performing additional allocations.
 // The string is only valid for the lifetime of the Binary array.
@@ -47,10 +53,32 @@ func (a *Binary) ValueString(i int) string {
 	return *(*string)(unsafe.Pointer(&b))
 }
 
-func (a *Binary) ValueOffset(i int) int { return int(a.valueOffsets[i]) }
-func (a *Binary) ValueLen(i int) int    { return int(a.valueOffsets[i+1] - a.valueOffsets[i]) }
-func (a *Binary) ValueOffsets() []int32 { return a.valueOffsets }
-func (a *Binary) ValueBytes() []byte    { return a.valueBytes }
+func (a *Binary) ValueOffset(i int) int {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	return int(a.valueOffsets[a.array.data.offset+i])
+}
+
+func (a *Binary) ValueLen(i int) int {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	beg := a.array.data.offset + i
+	return int(a.valueOffsets[beg+1] - a.valueOffsets[beg])
+}
+
+func (a *Binary) ValueOffsets() []int32 {
+	beg := a.array.data.offset
+	end := beg + a.array.data.length + 1
+	return a.valueOffsets[beg:end]
+}
+
+func (a *Binary) ValueBytes() []byte {
+	beg := a.array.data.offset
+	end := beg + a.array.data.length
+	return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]]
+}
 
 func (a *Binary) setData(data *Data) {
 	if len(data.buffers) != 3 {
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index 87d1b58..2af45de 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -17,6 +17,7 @@
 package array
 
 import (
+	"reflect"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -62,3 +63,345 @@ func TestBinary(t *testing.T) {
 
 	b.Release()
 }
+
+func TestBinarySliceData(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	for _, v := range values {
+		b.AppendString(v)
+	}
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(values); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	vs := make([]string, arr.Len())
+
+	for i := range vs {
+		vs[i] = arr.ValueString(i)
+	}
+
+	if got, want := vs, values; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	tests := []struct {
+		interval [2]int64
+		want     []string
+	}{
+		{
+			interval: [2]int64{0, 0},
+			want:     []string{},
+		},
+		{
+			interval: [2]int64{0, 5},
+			want:     []string{"a", "bc", "def", "g", "hijk"},
+		},
+		{
+			interval: [2]int64{0, 10},
+			want:     []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"},
+		},
+		{
+			interval: [2]int64{5, 10},
+			want:     []string{"lm", "n", "opq", "rs", "tu"},
+		},
+		{
+			interval: [2]int64{10, 10},
+			want:     []string{},
+		},
+		{
+			interval: [2]int64{2, 7},
+			want:     []string{"def", "g", "hijk", "lm", "n"},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary)
+			defer slice.Release()
+
+			if got, want := slice.Len(), len(tc.want); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			vs := make([]string, slice.Len())
+
+			for i := range vs {
+				vs[i] = slice.ValueString(i)
+			}
+
+			if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		})
+	}
+}
+
+func TestBinarySliceDataWithNull(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	if got, want := arr.Len(), len(values); got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	if got, want := arr.NullN(), 3; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	vs := make([]string, arr.Len())
+
+	for i := range vs {
+		vs[i] = arr.ValueString(i)
+	}
+
+	if got, want := vs, values; !reflect.DeepEqual(got, want) {
+		t.Fatalf("got=%v, want=%v", got, want)
+	}
+
+	tests := []struct {
+		interval [2]int64
+		nulls    int
+		want     []string
+	}{
+		{
+			interval: [2]int64{0, 2},
+			nulls:    0,
+			want:     []string{"a", "bc"},
+		},
+		{
+			interval: [2]int64{0, 3},
+			nulls:    1,
+			want:     []string{"a", "bc", ""},
+		},
+		{
+			interval: [2]int64{0, 4},
+			nulls:    2,
+			want:     []string{"a", "bc", "", ""},
+		},
+		{
+			interval: [2]int64{4, 8},
+			nulls:    0,
+			want:     []string{"hijk", "lm", "", "opq"},
+		},
+		{
+			interval: [2]int64{2, 9},
+			nulls:    3,
+			want:     []string{"", "", "hijk", "lm", "", "opq", ""},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary)
+			defer slice.Release()
+
+			if got, want := slice.Len(), len(tc.want); got != want {
+				t.Fatalf("got=%d, want=%d", got, want)
+			}
+
+			if got, want := slice.NullN(), tc.nulls; got != want {
+				t.Errorf("got=%d, want=%d", got, want)
+			}
+
+			vs := make([]string, slice.Len())
+
+			for i := range vs {
+				vs[i] = slice.ValueString(i)
+			}
+
+			if got, want := vs, tc.want; !reflect.DeepEqual(got, want) {
+				t.Fatalf("got=%v, want=%v", got, want)
+			}
+		})
+	}
+}
+
+func TestBinarySliceOutOfBounds(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	for _, v := range values {
+		b.AppendString(v)
+	}
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	slice := NewSlice(arr, 3, 8).(*Binary)
+	defer slice.Release()
+
+	tests := []struct {
+		index int
+		panic bool
+	}{
+		{
+			index: -1,
+			panic: true,
+		},
+		{
+			index: 5,
+			panic: true,
+		},
+		{
+			index: 0,
+			panic: false,
+		},
+		{
+			index: 4,
+			panic: false,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run("", func(t *testing.T) {
+
+			var val string
+
+			if tc.panic {
+				defer func() {
+					e := recover()
+					if e == nil {
+						t.Fatalf("this should have panicked, but did not; slice value %q", val)
+					}
+					if got, want := e.(string), "arrow/array: index out of range"; got != want {
+						t.Fatalf("invalid error. got=%q, want=%q", got, want)
+					}
+				}()
+			} else {
+				defer func() {
+					if e := recover(); e != nil {
+						t.Fatalf("unexpected panic: %v", e)
+					}
+				}()
+			}
+
+			val = slice.ValueString(tc.index)
+		})
+	}
+}
+
+func TestBinaryValueOffset(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	offset := 3
+	vs := values[2:9]
+
+	for i, v := range vs {
+		assert.Equal(t, offset, slice.ValueOffset(i))
+		offset += len(v)
+	}
+}
+
+func TestBinaryValueLen(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	vs := values[2:9]
+
+	for i, v := range vs {
+		assert.Equal(t, len(v), slice.ValueLen(i))
+	}
+}
+
+func TestBinaryValueOffsets(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets())
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets())
+}
+
+func TestBinaryValueBytes(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"}
+	valids := []bool{true, true, false, false, true, true, true, true, false, true}
+
+	b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+	defer b.Release()
+
+	b.AppendStringValues(values, valids)
+
+	arr := b.NewArray().(*Binary)
+	defer arr.Release()
+
+	assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes())
+
+	slice := NewSlice(arr, 2, 9).(*Binary)
+	defer slice.Release()
+
+	assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes())
+}