You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by sb...@apache.org on 2019/04/29 08:24:38 UTC
[arrow] branch master updated: ARROW-5111: [Go] implement reading
list arrays from Arrow file
This is an automated email from the ASF dual-hosted git repository.
sbinet pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9526138 ARROW-5111: [Go] implement reading list arrays from Arrow file
9526138 is described below
commit 9526138d65cc1d1616f76e036d13e6a54440f3bd
Author: Sebastien Binet <bi...@cern.ch>
AuthorDate: Mon Apr 29 10:24:23 2019 +0200
ARROW-5111: [Go] implement reading list arrays from Arrow file
Author: Sebastien Binet <bi...@cern.ch>
Closes #4157 from sbinet/issue-5111 and squashes the following commits:
a7fafdd9 <Sebastien Binet> ARROW-5111: implement reading list arrays from Arrow file
---
go/arrow/ipc/file_reader.go | 26 ++++++++++++++++++++++++++
go/arrow/ipc/metadata.go | 6 ++++++
2 files changed, 32 insertions(+)
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index fef552d..1765e38 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -364,6 +364,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
case *arrow.BinaryType, *arrow.StringType:
return ctx.loadBinary(dt)
+ case *arrow.ListType:
+ return ctx.loadList(dt)
+
default:
panic(errors.Errorf("array type %T not handled yet", dt))
}
@@ -385,6 +388,16 @@ func (ctx *arrayLoaderContext) loadCommon(nbufs int) (*flatbuf.FieldNode, []*mem
return field, buffers
}
+func (ctx *arrayLoaderContext) loadChild(dt arrow.DataType) array.Interface {
+ if ctx.max == 0 {
+ panic("arrow/ipc: nested type limit reached")
+ }
+ ctx.max--
+ sub := ctx.loadArray(dt)
+ ctx.max++
+ return sub
+}
+
func (ctx *arrayLoaderContext) loadNull() array.Interface {
field, buffers := ctx.loadCommon(1)
buffers = append(buffers, ctx.buffer())
@@ -422,6 +435,19 @@ func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) array.Interface {
return array.MakeFromData(data)
}
+func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
+ field, buffers := ctx.loadCommon(2)
+ buffers = append(buffers, ctx.buffer())
+
+ sub := ctx.loadChild(dt.Elem())
+ defer sub.Release()
+
+ data := array.NewData(dt, int(field.Length()), buffers, []*array.Data{sub.Data()}, int(field.NullCount()), 0)
+ defer data.Release()
+
+ return array.NewListData(data)
+}
+
func readDictionary(meta *memory.Buffer, types dictTypeMap, r ReadAtSeeker) (int64, array.Interface, error) {
// msg := flatbuf.GetRootAsMessage(meta.Bytes(), 0)
// var dictBatch flatbuf.DictionaryBatch
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index f3ecc2e..cfbfd24 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -248,6 +248,12 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
case flatbuf.TypeBool:
return arrow.FixedWidthTypes.Boolean, nil
+ case flatbuf.TypeList:
+ if len(children) != 1 {
+ return nil, errors.Errorf("arrow/ipc: List must have exactly 1 child field (got=%d)", len(children))
+ }
+ return arrow.ListOf(children[0].Type), nil
+
default:
// FIXME(sbinet): implement all the other types.
panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))