You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/04/30 19:16:40 UTC

[arrow] branch master updated: ARROW-5172: [Go] implement reading fixed-size binary arrays from Arrow file

This is an automated email from the ASF dual-hosted git repository.

sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a54b510  ARROW-5172: [Go] implement reading fixed-size binary arrays from Arrow file
a54b510 is described below

commit a54b5100e912ec22f3902dd1a5ab41cc0328c35d
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Tue Apr 30 21:16:26 2019 +0200

    ARROW-5172: [Go] implement reading fixed-size binary arrays from Arrow file
    
    Author: Sebastien Binet <bi...@cern.ch>
    
    Closes #4159 from sbinet/issue-5172 and squashes the following commits:
    
    6293fb0a <Sebastien Binet> arrow/array: use FixedWidthDataType interface
    bba8571b <Sebastien Binet> ARROW-5172:  implement reading fixed-size binary arrays from Arrow file
---
 go/arrow/array/array_test.go                   |  1 +
 go/arrow/array/fixedsize_binary.go             | 41 ++++++++++++++++++++++++--
 go/arrow/array/fixedsize_binarybuilder_test.go |  1 +
 go/arrow/ipc/file_reader.go                    | 13 ++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index eb97d9e..014a79a 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -32,6 +32,7 @@ type testDataType struct {
 
 func (d *testDataType) ID() arrow.Type { return d.id }
 func (d *testDataType) Name() string   { panic("implement me") }
+func (d *testDataType) BitWidth() int  { return 8 }
 
 func TestMakeFromData(t *testing.T) {
 	tests := []struct {
diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go
index 258f2d2..78925bc 100644
--- a/go/arrow/array/fixedsize_binary.go
+++ b/go/arrow/array/fixedsize_binary.go
@@ -16,7 +16,12 @@
 
 package array
 
-import "github.com/apache/arrow/go/arrow"
+import (
+	"fmt"
+	"strings"
+
+	"github.com/apache/arrow/go/arrow"
+)
 
 // A type which represents an immutable sequence of fixed-length binary strings.
 type FixedSizeBinary struct {
@@ -43,6 +48,24 @@ func (a *FixedSizeBinary) ValueLen(i int) int    { return int(a.valueOffsets[i+1
 func (a *FixedSizeBinary) ValueOffsets() []int32 { return a.valueOffsets }
 func (a *FixedSizeBinary) ValueBytes() []byte    { return a.valueBytes }
 
+func (a *FixedSizeBinary) String() string {
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i := 0; i < a.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString("(null)")
+		default:
+			fmt.Fprintf(o, "%q", a.Value(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
 func (a *FixedSizeBinary) setData(data *Data) {
 	if len(data.buffers) != 3 {
 		panic("len(data.buffers) != 3")
@@ -53,7 +76,21 @@ func (a *FixedSizeBinary) setData(data *Data) {
 	if valueBytes := data.buffers[2]; valueBytes != nil {
 		a.valueBytes = valueBytes.Bytes()
 	}
-	if valueOffsets := data.buffers[1]; valueOffsets != nil {
+
+	switch valueOffsets := data.buffers[1]; valueOffsets {
+	case nil:
+		// re-compute offsets
+		offsets := make([]int32, a.Len()+1)
+		bw := a.DataType().(arrow.FixedWidthDataType).BitWidth() / 8
+		for i := range offsets[1:] {
+			var delta int32
+			if a.IsValid(i) {
+				delta = int32(bw)
+			}
+			offsets[i+1] = offsets[i] + delta
+		}
+		a.valueOffsets = offsets
+	default:
 		a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes())
 	}
 }
diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go
index bd0bb5f..f50e1b0 100644
--- a/go/arrow/array/fixedsize_binarybuilder_test.go
+++ b/go/arrow/array/fixedsize_binarybuilder_test.go
@@ -64,6 +64,7 @@ func TestFixedSizeBinaryBuilder(t *testing.T) {
 	assert.Zero(t, b.Len(), "unexpected ArrayBuilder.Len(), NewFixedSizeBinaryArray did not reset state")
 	assert.Zero(t, b.Cap(), "unexpected ArrayBuilder.Cap(), NewFixedSizeBinaryArray did not reset state")
 	assert.Zero(t, b.NullN(), "unexpected ArrayBuilder.NullN(), NewFixedSizeBinaryArray did not reset state")
+	assert.Equal(t, a.String(), `["1234567" (null) "ABCDEFG" (null) "7654321" (null) "AZERTYU"]`)
 
 	b.Release()
 	a.Release()
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 26e48fa..8fa3009 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -364,6 +364,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 	case *arrow.BinaryType, *arrow.StringType:
 		return ctx.loadBinary(dt)
 
+	case *arrow.FixedSizeBinaryType:
+		return ctx.loadFixedSizeBinary(dt)
+
 	case *arrow.ListType:
 		return ctx.loadList(dt)
 
@@ -438,6 +441,16 @@ func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) array.Interface {
 	return array.MakeFromData(data)
 }
 
+func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) array.Interface {
+	field, buffers := ctx.loadCommon(2)
+	buffers = append(buffers, nil, ctx.buffer())
+
+	data := array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
+	defer data.Release()
+
+	return array.MakeFromData(data)
+}
+
 func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
 	field, buffers := ctx.loadCommon(2)
 	buffers = append(buffers, ctx.buffer())