You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2023/04/10 18:51:01 UTC
[arrow] branch main updated: GH-34853: [Go] Add TotalRecordSize, TotalArraySize (#34854)
This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2fe17338e2 GH-34853: [Go] Add TotalRecordSize, TotalArraySize (#34854)
2fe17338e2 is described below
commit 2fe17338e2d1f85d0c2685d31d2dd51f138b6b80
Author: Yevgeny Pats <ye...@gmail.com>
AuthorDate: Mon Apr 10 14:50:55 2023 -0400
GH-34853: [Go] Add TotalRecordSize, TotalArraySize (#34854)
Closes https://github.com/apache/arrow/issues/34853
* Closes: #34853
Authored-by: Yevgeny Pats <16...@users.noreply.github.com>
Signed-off-by: Matt Topol <zo...@gmail.com>
---
go/arrow/util/byte_size.go | 79 +++++++++++++++++++++++++++++
go/arrow/util/byte_size_test.go | 110 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 189 insertions(+)
diff --git a/go/arrow/util/byte_size.go b/go/arrow/util/byte_size.go
new file mode 100644
index 0000000000..db08e8d27b
--- /dev/null
+++ b/go/arrow/util/byte_size.go
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package util
+
+import (
+ "github.com/apache/arrow/go/v12/arrow"
+ "github.com/apache/arrow/go/v12/arrow/array"
+ "github.com/apache/arrow/go/v12/arrow/memory"
+)
+
+func isArrayDataNil(arrayData arrow.ArrayData) bool {
+ if arrayData == nil {
+ return true
+ }
+ if v, ok := arrayData.(*array.Data); ok {
+ return v == nil
+ }
+ panic("unknown ArrayData type")
+}
+
+func totalArrayDataSize(arrayData arrow.ArrayData, seenBuffers map[*memory.Buffer]struct{}) int64 {
+ var sum int64
+ var void = struct{}{}
+ for _, buf := range arrayData.Buffers() {
+ if buf == nil {
+ continue
+ }
+ if _, ok := seenBuffers[buf]; !ok {
+ sum += int64(buf.Len())
+ seenBuffers[buf] = void
+ }
+ }
+ for _, child := range arrayData.Children() {
+ sum += totalArrayDataSize(child, seenBuffers)
+ }
+ dict := arrayData.Dictionary()
+ if !isArrayDataNil(dict) {
+ sum += totalArrayDataSize(dict, seenBuffers)
+ }
+ return sum
+}
+
+func totalArraySize(arr arrow.Array, seenBuffers map[*memory.Buffer]struct{}) int64 {
+ return totalArrayDataSize(arr.Data(), seenBuffers)
+}
+
+func totalRecordSize(record arrow.Record, seenBuffers map[*memory.Buffer]struct{}) int64 {
+ var sum int64
+ for _, c := range record.Columns() {
+ sum += totalArraySize(c, seenBuffers)
+ }
+ return sum
+}
+
+// TotalArraySize returns the sum of the number of bytes in each buffer referenced by the Array.
+func TotalArraySize(arr arrow.Array) int64 {
+ seenBuffer := make(map[*memory.Buffer]struct{})
+ return totalArraySize(arr, seenBuffer)
+}
+
+// TotalRecordSize return the sum of bytes in each buffer referenced by the Record.
+func TotalRecordSize(record arrow.Record) int64 {
+ seenBuffer := make(map[*memory.Buffer]struct{})
+ return totalRecordSize(record, seenBuffer)
+}
diff --git a/go/arrow/util/byte_size_test.go b/go/arrow/util/byte_size_test.go
new file mode 100644
index 0000000000..794aaf1953
--- /dev/null
+++ b/go/arrow/util/byte_size_test.go
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package util_test
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/apache/arrow/go/v12/arrow"
+ "github.com/apache/arrow/go/v12/arrow/array"
+ "github.com/apache/arrow/go/v12/arrow/memory"
+ "github.com/apache/arrow/go/v12/arrow/util"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestTotalArrayReusedBuffers(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+ bldr := array.NewBooleanBuilder(mem)
+ defer bldr.Release()
+ bldr.Append(true)
+ arr := bldr.NewArray()
+ defer arr.Release()
+
+ rec := array.NewRecord(arrow.NewSchema([]arrow.Field{
+ {Name: "a", Type: arrow.FixedWidthTypes.Boolean},
+ {Name: "b", Type: arrow.FixedWidthTypes.Boolean},
+ }, nil), []arrow.Array{arr, arr}, 1)
+ defer rec.Release()
+
+ assert.Equal(t, int64(5), util.TotalRecordSize(rec))
+
+ rec1 := array.NewRecord(arrow.NewSchema([]arrow.Field{
+ {Name: "a", Type: arrow.FixedWidthTypes.Boolean},
+ }, nil), []arrow.Array{arr}, 1)
+ defer rec1.Release()
+
+ // both records should have the same size as rec is using the same buffer
+ assert.Equal(t, int64(5), util.TotalRecordSize(rec1))
+}
+
+func TestTotalArraySizeBasic(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ noNulls, _, err := array.FromJSON(mem,
+ arrow.PrimitiveTypes.Int16,
+ strings.NewReader("[1, 2, 3]"))
+ assert.NoError(t, err)
+ defer noNulls.Release()
+ assert.Equal(t, int64(10), util.TotalArraySize(noNulls))
+
+ withNulls, _, err := array.FromJSON(mem,
+ arrow.PrimitiveTypes.Int16,
+ strings.NewReader("[1, 2, 3, 4, null, 6, 7, 8, 9]"))
+ assert.NoError(t, err)
+ defer withNulls.Release()
+ assert.Equal(t, int64(22), util.TotalArraySize(withNulls))
+
+ bldr := array.NewBooleanBuilder(mem)
+ defer bldr.Release()
+
+ arr := bldr.NewArray()
+ defer arr.Release()
+
+ assert.Equal(t, int64(0), util.TotalArraySize(arr))
+}
+
+func TestTotalArraySizeNested(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ arrayWithChildren, _, err := array.FromJSON(mem,
+ arrow.ListOf(arrow.PrimitiveTypes.Int64),
+ strings.NewReader("[[0, 1, 2, 3, 4], [5], null]"))
+ assert.NoError(t, err)
+ defer arrayWithChildren.Release()
+ assert.Equal(t, int64(72), util.TotalArraySize(arrayWithChildren))
+}
+
+func TestTotalArraySizeRecord(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ recordBldr := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{
+ {Name: "a", Type: arrow.PrimitiveTypes.Int32},
+ {Name: "b", Type: arrow.PrimitiveTypes.Int64},
+ }, nil))
+ defer recordBldr.Release()
+ recordBldr.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3}, nil)
+ recordBldr.Field(1).(*array.Int64Builder).AppendValues([]int64{4, 5, 6}, nil)
+ record := recordBldr.NewRecord()
+ defer record.Release()
+
+ assert.Equal(t, int64(44), util.TotalRecordSize(record))
+}