You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/04/29 08:27:38 UTC

[arrow] branch master updated: ARROW-5110: [Go] implement reading struct arrays from Arrow file

This is an automated email from the ASF dual-hosted git repository.

sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cd4c2ca  ARROW-5110: [Go] implement reading struct arrays from Arrow file
cd4c2ca is described below

commit cd4c2ca6b32f038beb41bd8da9af9a2d8762c60b
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Apr 29 10:27:26 2019 +0200

    ARROW-5110: [Go] implement reading struct arrays from Arrow file
    
    Author: Sebastien Binet <bi...@cern.ch>
    
    Closes #4158 from sbinet/issue-5110 and squashes the following commits:
    
    f6f9b9ea <Sebastien Binet> arrow/array: provide List array stringer implementation
    b838ee04 <Sebastien Binet> arrow/array: fix Struct array Stringer implementation
    d496c2ff <Sebastien Binet> ARROW-5110:  implement reading struct arrays from Arrow file
---
 go/arrow/array/list.go        | 23 +++++++++++++++++++
 go/arrow/array/struct.go      | 11 +++------
 go/arrow/array/struct_test.go | 53 +++++++++++++++++++++++++++++++++++++++++++
 go/arrow/example_test.go      |  2 ++
 go/arrow/ipc/file_reader.go   | 24 ++++++++++++++++++++
 go/arrow/ipc/metadata.go      |  3 +++
 6 files changed, 108 insertions(+), 8 deletions(-)

diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go
index 1c4c01f..b571c50 100644
--- a/go/arrow/array/list.go
+++ b/go/arrow/array/list.go
@@ -17,6 +17,8 @@
 package array
 
 import (
+	"fmt"
+	"strings"
 	"sync/atomic"
 
 	"github.com/apache/arrow/go/arrow"
@@ -42,6 +44,27 @@ func NewListData(data *Data) *List {
 
 func (a *List) ListValues() Interface { return a.values }
 
+func (a *List) String() string {
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i := 0; i < a.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		if !a.IsValid(i) {
+			o.WriteString("(null)")
+			continue
+		}
+		beg := int64(a.offsets[i])
+		end := int64(a.offsets[i+1])
+		sub := NewSlice(a.values, beg, end)
+		fmt.Fprintf(o, "%v", sub)
+		sub.Release()
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
 func (a *List) setData(data *Data) {
 	a.array.setData(data)
 	vals := data.buffers[1]
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
index 9a65be0..55fd913 100644
--- a/go/arrow/array/struct.go
+++ b/go/arrow/array/struct.go
@@ -46,19 +46,14 @@ func (a *Struct) Field(i int) Interface { return a.fields[i] }
 
 func (a *Struct) String() string {
 	o := new(strings.Builder)
-	o.WriteString("[")
+	o.WriteString("{")
 	for i, v := range a.fields {
 		if i > 0 {
 			o.WriteString(" ")
 		}
-		switch {
-		case a.IsNull(i):
-			o.WriteString("(null)")
-		default:
-			fmt.Fprintf(o, "%v", v)
-		}
+		fmt.Fprintf(o, "%v", v)
 	}
-	o.WriteString("]")
+	o.WriteString("}")
 	return o.String()
 }
 
diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go
index ba21f20..d9701ce 100644
--- a/go/arrow/array/struct_test.go
+++ b/go/arrow/array/struct_test.go
@@ -247,3 +247,56 @@ func TestStructArrayBulkAppend(t *testing.T) {
 		}
 	}
 }
+
+func TestStructArrayStringer(t *testing.T) {
+	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer pool.AssertSize(t, 0)
+
+	var (
+		f1s = []float64{1.1, 1.2, 1.3, 1.4}
+		f2s = []int32{1, 2, 3, 4}
+
+		fields = []arrow.Field{
+			{Name: "f1", Type: arrow.PrimitiveTypes.Float64},
+			{Name: "f2", Type: arrow.PrimitiveTypes.Int32},
+		}
+		dtype = arrow.StructOf(fields...)
+	)
+
+	sb := array.NewStructBuilder(pool, dtype)
+	defer sb.Release()
+
+	f1b := sb.FieldBuilder(0).(*array.Float64Builder)
+	defer f1b.Release()
+
+	f2b := sb.FieldBuilder(1).(*array.Int32Builder)
+	defer f2b.Release()
+
+	if got, want := sb.NumField(), 2; got != want {
+		t.Fatalf("got=%d, want=%d", got, want)
+	}
+
+	for i := range f1s {
+		sb.Append(true)
+		switch i {
+		case 1:
+			f1b.AppendNull()
+			f2b.Append(f2s[i])
+		case 2:
+			f1b.Append(f1s[i])
+			f2b.AppendNull()
+		default:
+			f1b.Append(f1s[i])
+			f2b.Append(f2s[i])
+		}
+	}
+
+	arr := sb.NewArray().(*array.Struct)
+	defer arr.Release()
+
+	want := "{[1.1 (null) 1.3 1.4] [1 2 (null) 4]}"
+	got := arr.String()
+	if got != want {
+		t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want)
+	}
+}
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index 9172a5b..8015cbf 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -188,6 +188,7 @@ func Example_listArray() {
 		pos = int(offsets[i])
 		fmt.Printf("]\n")
 	}
+	fmt.Printf("List      = %v\n", arr)
 
 	// Output:
 	// NullN()   = 2
@@ -200,6 +201,7 @@ func Example_listArray() {
 	// List[4]   = [6, 7, 8]
 	// List[5]   = (null)
 	// List[6]   = [9]
+	// List      = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]]
 }
 
 // This example shows how to create a Struct array.
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 1765e38..26e48fa 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -367,6 +367,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 	case *arrow.ListType:
 		return ctx.loadList(dt)
 
+	case *arrow.StructType:
+		return ctx.loadStruct(dt)
+
 	default:
 		panic(errors.Errorf("array type %T not handled yet", dt))
 	}
@@ -448,6 +451,27 @@ func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
 	return array.NewListData(data)
 }
 
+func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) array.Interface {
+	field, buffers := ctx.loadCommon(1)
+
+	arrs := make([]array.Interface, len(dt.Fields()))
+	subs := make([]*array.Data, len(dt.Fields()))
+	for i, f := range dt.Fields() {
+		arrs[i] = ctx.loadChild(f.Type)
+		subs[i] = arrs[i].Data()
+	}
+	defer func() {
+		for i := range arrs {
+			arrs[i].Release()
+		}
+	}()
+
+	data := array.NewData(dt, int(field.Length()), buffers, subs, int(field.NullCount()), 0)
+	defer data.Release()
+
+	return array.NewStructData(data)
+}
+
 func readDictionary(meta *memory.Buffer, types dictTypeMap, r ReadAtSeeker) (int64, array.Interface, error) {
 	//	msg := flatbuf.GetRootAsMessage(meta.Bytes(), 0)
 	//	var dictBatch flatbuf.DictionaryBatch
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index cfbfd24..11665b9 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -254,6 +254,9 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 		}
 		return arrow.ListOf(children[0].Type), nil
 
+	case flatbuf.TypeStruct_:
+		return arrow.StructOf(children...), nil
+
 	default:
 		// FIXME(sbinet): implement all the other types.
 		panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))