You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/04/29 08:24:38 UTC

[arrow] branch master updated: ARROW-5111: [Go] implement reading list arrays from Arrow file

This is an automated email from the ASF dual-hosted git repository.

sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9526138  ARROW-5111: [Go] implement reading list arrays from Arrow file
9526138 is described below

commit 9526138d65cc1d1616f76e036d13e6a54440f3bd
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Apr 29 10:24:23 2019 +0200

    ARROW-5111: [Go] implement reading list arrays from Arrow file
    
    Author: Sebastien Binet <bi...@cern.ch>
    
    Closes #4157 from sbinet/issue-5111 and squashes the following commits:
    
    a7fafdd9 <Sebastien Binet> ARROW-5111:  implement reading list arrays from Arrow file
---
 go/arrow/ipc/file_reader.go | 26 ++++++++++++++++++++++++++
 go/arrow/ipc/metadata.go    |  6 ++++++
 2 files changed, 32 insertions(+)

diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index fef552d..1765e38 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -364,6 +364,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
 	case *arrow.BinaryType, *arrow.StringType:
 		return ctx.loadBinary(dt)
 
+	case *arrow.ListType:
+		return ctx.loadList(dt)
+
 	default:
 		panic(errors.Errorf("array type %T not handled yet", dt))
 	}
@@ -385,6 +388,16 @@ func (ctx *arrayLoaderContext) loadCommon(nbufs int) (*flatbuf.FieldNode, []*mem
 	return field, buffers
 }
 
+func (ctx *arrayLoaderContext) loadChild(dt arrow.DataType) array.Interface {
+	if ctx.max == 0 {
+		panic("arrow/ipc: nested type limit reached")
+	}
+	ctx.max--
+	sub := ctx.loadArray(dt)
+	ctx.max++
+	return sub
+}
+
 func (ctx *arrayLoaderContext) loadNull() array.Interface {
 	field, buffers := ctx.loadCommon(1)
 	buffers = append(buffers, ctx.buffer())
@@ -422,6 +435,19 @@ func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) array.Interface {
 	return array.MakeFromData(data)
 }
 
+func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
+	field, buffers := ctx.loadCommon(2)
+	buffers = append(buffers, ctx.buffer())
+
+	sub := ctx.loadChild(dt.Elem())
+	defer sub.Release()
+
+	data := array.NewData(dt, int(field.Length()), buffers, []*array.Data{sub.Data()}, int(field.NullCount()), 0)
+	defer data.Release()
+
+	return array.NewListData(data)
+}
+
 func readDictionary(meta *memory.Buffer, types dictTypeMap, r ReadAtSeeker) (int64, array.Interface, error) {
 	//	msg := flatbuf.GetRootAsMessage(meta.Bytes(), 0)
 	//	var dictBatch flatbuf.DictionaryBatch
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index f3ecc2e..cfbfd24 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -248,6 +248,12 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
 	case flatbuf.TypeBool:
 		return arrow.FixedWidthTypes.Boolean, nil
 
+	case flatbuf.TypeList:
+		if len(children) != 1 {
+			return nil, errors.Errorf("arrow/ipc: List must have exactly 1 child field (got=%d)", len(children))
+		}
+		return arrow.ListOf(children[0].Type), nil
+
 	default:
 		// FIXME(sbinet): implement all the other types.
 		panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))