You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/04/30 19:16:40 UTC
[arrow] branch master updated: ARROW-5172: [Go] implement reading
fixed-size binary arrays from Arrow file
This is an automated email from the ASF dual-hosted git repository.
sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a54b510 ARROW-5172: [Go] implement reading fixed-size binary arrays from Arrow file
a54b510 is described below
commit a54b5100e912ec22f3902dd1a5ab41cc0328c35d
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Tue Apr 30 21:16:26 2019 +0200
ARROW-5172: [Go] implement reading fixed-size binary arrays from Arrow file
Author: Sebastien Binet <bi...@cern.ch>
Closes #4159 from sbinet/issue-5172 and squashes the following commits:
6293fb0a <Sebastien Binet> arrow/array: use FixedWidthDataType interface
bba8571b <Sebastien Binet> ARROW-5172: implement reading fixed-size binary arrays from Arrow file
---
go/arrow/array/array_test.go | 1 +
go/arrow/array/fixedsize_binary.go | 41 ++++++++++++++++++++++++--
go/arrow/array/fixedsize_binarybuilder_test.go | 1 +
go/arrow/ipc/file_reader.go | 13 ++++++++
4 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index eb97d9e..014a79a 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -32,6 +32,7 @@ type testDataType struct {
func (d *testDataType) ID() arrow.Type { return d.id }
func (d *testDataType) Name() string { panic("implement me") }
+func (d *testDataType) BitWidth() int { return 8 }
func TestMakeFromData(t *testing.T) {
tests := []struct {
diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go
index 258f2d2..78925bc 100644
--- a/go/arrow/array/fixedsize_binary.go
+++ b/go/arrow/array/fixedsize_binary.go
@@ -16,7 +16,12 @@
package array
-import "github.com/apache/arrow/go/arrow"
+import (
+ "fmt"
+ "strings"
+
+ "github.com/apache/arrow/go/arrow"
+)
// A type which represents an immutable sequence of fixed-length binary strings.
type FixedSizeBinary struct {
@@ -43,6 +48,24 @@ func (a *FixedSizeBinary) ValueLen(i int) int { return int(a.valueOffsets[i+1
func (a *FixedSizeBinary) ValueOffsets() []int32 { return a.valueOffsets }
func (a *FixedSizeBinary) ValueBytes() []byte { return a.valueBytes }
+func (a *FixedSizeBinary) String() string {
+ o := new(strings.Builder)
+ o.WriteString("[")
+ for i := 0; i < a.Len(); i++ {
+ if i > 0 {
+ o.WriteString(" ")
+ }
+ switch {
+ case a.IsNull(i):
+ o.WriteString("(null)")
+ default:
+ fmt.Fprintf(o, "%q", a.Value(i))
+ }
+ }
+ o.WriteString("]")
+ return o.String()
+}
+
func (a *FixedSizeBinary) setData(data *Data) {
if len(data.buffers) != 3 {
panic("len(data.buffers) != 3")
@@ -53,7 +76,21 @@ func (a *FixedSizeBinary) setData(data *Data) {
if valueBytes := data.buffers[2]; valueBytes != nil {
a.valueBytes = valueBytes.Bytes()
}
- if valueOffsets := data.buffers[1]; valueOffsets != nil {
+
+ switch valueOffsets := data.buffers[1]; valueOffsets {
+ case nil:
+ // re-compute offsets
+ offsets := make([]int32, a.Len()+1)
+ bw := a.DataType().(arrow.FixedWidthDataType).BitWidth() / 8
+ for i := range offsets[1:] {
+ var delta int32
+ if a.IsValid(i) {
+ delta = int32(bw)
+ }
+ offsets[i+1] = offsets[i] + delta
+ }
+ a.valueOffsets = offsets
+ default:
a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes())
}
}
diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go
index bd0bb5f..f50e1b0 100644
--- a/go/arrow/array/fixedsize_binarybuilder_test.go
+++ b/go/arrow/array/fixedsize_binarybuilder_test.go
@@ -64,6 +64,7 @@ func TestFixedSizeBinaryBuilder(t *testing.T) {
assert.Zero(t, b.Len(), "unexpected ArrayBuilder.Len(), NewFixedSizeBinaryArray did not reset state")
assert.Zero(t, b.Cap(), "unexpected ArrayBuilder.Cap(), NewFixedSizeBinaryArray did not reset state")
assert.Zero(t, b.NullN(), "unexpected ArrayBuilder.NullN(), NewFixedSizeBinaryArray did not reset state")
+ assert.Equal(t, a.String(), `["1234567" (null) "ABCDEFG" (null) "7654321" (null) "AZERTYU"]`)
b.Release()
a.Release()
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 26e48fa..8fa3009 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -364,6 +364,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
case *arrow.BinaryType, *arrow.StringType:
return ctx.loadBinary(dt)
+ case *arrow.FixedSizeBinaryType:
+ return ctx.loadFixedSizeBinary(dt)
+
case *arrow.ListType:
return ctx.loadList(dt)
@@ -438,6 +441,16 @@ func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) array.Interface {
return array.MakeFromData(data)
}
+func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) array.Interface {
+ field, buffers := ctx.loadCommon(2)
+ buffers = append(buffers, nil, ctx.buffer())
+
+ data := array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
+ defer data.Release()
+
+ return array.MakeFromData(data)
+}
+
func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
field, buffers := ctx.loadCommon(2)
buffers = append(buffers, ctx.buffer())