You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/01/31 20:00:59 UTC
[arrow] branch master updated: GH-33901: [Go] Add a malloc-based allocator (#33902)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3affadad5d GH-33901: [Go] Add a malloc-based allocator (#33902)
3affadad5d is described below
commit 3affadad5de97005ef587b95638ae4693a7eed1f
Author: David Li <li...@gmail.com>
AuthorDate: Tue Jan 31 15:00:51 2023 -0500
GH-33901: [Go] Add a malloc-based allocator (#33902)
### Rationale for this change
Add a cgo-based allocator for use with the C Data Interface. (See #33900.)
### What changes are included in this PR?
Add a new allocator implementation, `Mallocator`.
### Are these changes tested?
A new test suite was added.
### Are there any user-facing changes?
Adds a new public implementation of an API.
* Closes: #33901
Authored-by: David Li <li...@gmail.com>
Signed-off-by: David Li <li...@gmail.com>
---
go/arrow/memory/allocator.go | 6 -
.../memory/{allocator.go => default_allocator.go} | 12 +-
.../memory/{allocator.go => default_mallocator.go} | 14 +--
.../memory/{doc.go => default_mallocator_test.go} | 19 +++-
go/arrow/memory/doc.go | 2 +
go/arrow/memory/mallocator/mallocator.go | 115 +++++++++++++++++++
go/arrow/memory/mallocator/mallocator_test.go | 125 +++++++++++++++++++++
7 files changed, 264 insertions(+), 29 deletions(-)
diff --git a/go/arrow/memory/allocator.go b/go/arrow/memory/allocator.go
index da6be44e37..1427190eaa 100644
--- a/go/arrow/memory/allocator.go
+++ b/go/arrow/memory/allocator.go
@@ -25,9 +25,3 @@ type Allocator interface {
Reallocate(size int, b []byte) []byte
Free(b []byte)
}
-
-// DefaultAllocator is a default implementation of Allocator and can be used anywhere
-// an Allocator is required.
-//
-// DefaultAllocator is safe to use from multiple goroutines.
-var DefaultAllocator Allocator = NewGoAllocator()
diff --git a/go/arrow/memory/allocator.go b/go/arrow/memory/default_allocator.go
similarity index 88%
copy from go/arrow/memory/allocator.go
copy to go/arrow/memory/default_allocator.go
index da6be44e37..1c63470d1c 100644
--- a/go/arrow/memory/allocator.go
+++ b/go/arrow/memory/default_allocator.go
@@ -14,17 +14,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package memory
-
-const (
- alignment = 64
-)
+//go:build !mallocator
-type Allocator interface {
- Allocate(size int) []byte
- Reallocate(size int, b []byte) []byte
- Free(b []byte)
-}
+package memory
// DefaultAllocator is a default implementation of Allocator and can be used anywhere
// an Allocator is required.
diff --git a/go/arrow/memory/allocator.go b/go/arrow/memory/default_mallocator.go
similarity index 84%
copy from go/arrow/memory/allocator.go
copy to go/arrow/memory/default_mallocator.go
index da6be44e37..17cd487c66 100644
--- a/go/arrow/memory/allocator.go
+++ b/go/arrow/memory/default_mallocator.go
@@ -14,20 +14,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+//go:build mallocator
+
package memory
-const (
- alignment = 64
+import (
+ "github.com/apache/arrow/go/v12/arrow/memory/mallocator"
)
-type Allocator interface {
- Allocate(size int) []byte
- Reallocate(size int, b []byte) []byte
- Free(b []byte)
-}
-
// DefaultAllocator is a default implementation of Allocator and can be used anywhere
// an Allocator is required.
//
// DefaultAllocator is safe to use from multiple goroutines.
-var DefaultAllocator Allocator = NewGoAllocator()
+var DefaultAllocator Allocator = mallocator.NewMallocator()
diff --git a/go/arrow/memory/doc.go b/go/arrow/memory/default_mallocator_test.go
similarity index 71%
copy from go/arrow/memory/doc.go
copy to go/arrow/memory/default_mallocator_test.go
index 959f88b4f2..85ad725571 100644
--- a/go/arrow/memory/doc.go
+++ b/go/arrow/memory/default_mallocator_test.go
@@ -14,7 +14,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-/*
-Package memory provides support for allocating and manipulating memory at a low level.
-*/
-package memory
+//go:build mallocator
+
+package memory_test
+
+import (
+ "testing"
+
+ "github.com/apache/arrow/go/v12/arrow/memory"
+ "github.com/apache/arrow/go/v12/arrow/memory/mallocator"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestDefaultAllocator(t *testing.T) {
+ assert.IsType(t, &mallocator.Mallocator{}, memory.DefaultAllocator)
+}
diff --git a/go/arrow/memory/doc.go b/go/arrow/memory/doc.go
index 959f88b4f2..20a28e4e2a 100644
--- a/go/arrow/memory/doc.go
+++ b/go/arrow/memory/doc.go
@@ -16,5 +16,7 @@
/*
Package memory provides support for allocating and manipulating memory at a low level.
+
+The build tag 'mallocator' will switch the default allocator to one backed by libc malloc. This also requires CGO.
*/
package memory
diff --git a/go/arrow/memory/mallocator/mallocator.go b/go/arrow/memory/mallocator/mallocator.go
new file mode 100644
index 0000000000..18e0377c4f
--- /dev/null
+++ b/go/arrow/memory/mallocator/mallocator.go
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package mallocator
+
+// #include <stdlib.h>
+// #include <string.h>
+//
+// void* realloc_and_initialize(void* ptr, size_t old_len, size_t new_len) {
+// void* new_ptr = realloc(ptr, new_len);
+// if (new_ptr && new_len > old_len) {
+// memset(new_ptr + old_len, 0, new_len - old_len);
+// }
+// return new_ptr;
+// }
+import "C"
+
+import (
+ "reflect"
+ "sync/atomic"
+ "unsafe"
+)
+
+// Mallocator is an allocator which defers to libc malloc.
+//
+// The priamry reason to use this is when exporting data across the C Data
+// Interface. CGO requires that pointers to Go memory are not stored in C
+// memory, which is exactly what the C Data Interface would otherwise
+// require. By allocating with Mallocator up front, we can safely export the
+// buffers in Arrow arrays without copying buffers or violating CGO rules.
+//
+// The build tag 'mallocator' will also make this the default allocator.
+type Mallocator struct {
+ allocatedBytes uint64
+}
+
+func NewMallocator() *Mallocator { return &Mallocator{} }
+
+func (alloc *Mallocator) Allocate(size int) []byte {
+ // Use calloc to zero-initialize memory.
+ // > ...the current implementation may sometimes cause a runtime error if the
+ // > contents of the C memory appear to be a Go pointer. Therefore, avoid
+ // > passing uninitialized C memory to Go code if the Go code is going to store
+ // > pointer values in it. Zero out the memory in C before passing it to Go.
+ if size < 0 {
+ panic("mallocator: negative size")
+ }
+ ptr, err := C.calloc(C.size_t(size), 1)
+ if err != nil {
+ panic(err)
+ } else if ptr == nil {
+ panic("mallocator: out of memory")
+ }
+ atomic.AddUint64(&alloc.allocatedBytes, uint64(size))
+ return unsafe.Slice((*byte)(ptr), size)
+}
+
+func (alloc *Mallocator) Free(b []byte) {
+ sh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+ C.free(unsafe.Pointer(sh.Data))
+ // Subtract sh.Len via two's complement (since atomic doesn't offer subtract)
+ atomic.AddUint64(&alloc.allocatedBytes, ^(uint64(sh.Len) - 1))
+}
+
+func (alloc *Mallocator) Reallocate(size int, b []byte) []byte {
+ if size < 0 {
+ panic("mallocator: negative size")
+ }
+ sh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+ ptr, err := C.realloc_and_initialize(unsafe.Pointer(sh.Data), C.size_t(sh.Cap), C.size_t(size))
+ if err != nil {
+ panic(err)
+ } else if ptr == nil && size != 0 {
+ panic("mallocator: out of memory")
+ }
+ delta := size - len(b)
+ if delta >= 0 {
+ atomic.AddUint64(&alloc.allocatedBytes, uint64(delta))
+ } else {
+ atomic.AddUint64(&alloc.allocatedBytes, ^(uint64(-delta) - 1))
+ }
+ return unsafe.Slice((*byte)(ptr), size)
+}
+
+func (alloc *Mallocator) AllocatedBytes() int64 {
+ return int64(alloc.allocatedBytes)
+}
+
+// Duplicate interface to avoid circular import
+type TestingT interface {
+ Errorf(format string, args ...interface{})
+ Helper()
+}
+
+func (alloc *Mallocator) AssertSize(t TestingT, sz int) {
+ cur := alloc.AllocatedBytes()
+ if int64(sz) != cur {
+ t.Helper()
+ t.Errorf("invalid memory size exp=%d, got=%d", sz, cur)
+ }
+}
diff --git a/go/arrow/memory/mallocator/mallocator_test.go b/go/arrow/memory/mallocator/mallocator_test.go
new file mode 100644
index 0000000000..40e8876b28
--- /dev/null
+++ b/go/arrow/memory/mallocator/mallocator_test.go
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package mallocator_test
+
+import (
+ "fmt"
+ "testing"
+
+ "github.com/apache/arrow/go/v12/arrow/memory/mallocator"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMallocatorAllocate(t *testing.T) {
+ sizes := []int{0, 1, 4, 33, 65, 4095, 4096, 8193}
+ for _, size := range sizes {
+ t.Run(fmt.Sprint(size), func(t *testing.T) {
+ a := mallocator.NewMallocator()
+ buf := a.Allocate(size)
+ defer a.Free(buf)
+
+ assert.Equal(t, size, len(buf))
+ assert.LessOrEqual(t, size, cap(buf))
+ // check 0-initialized
+ for idx, c := range buf {
+ assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
+ }
+ })
+ }
+}
+
+func TestMallocatorReallocate(t *testing.T) {
+ sizes := []struct {
+ before, after int
+ }{
+ {0, 1},
+ {1, 0},
+ {1, 2},
+ {1, 33},
+ {4, 4},
+ {32, 16},
+ {32, 1},
+ }
+ for _, test := range sizes {
+ t.Run(fmt.Sprintf("%dTo%d", test.before, test.after), func(t *testing.T) {
+ a := mallocator.NewMallocator()
+ buf := a.Allocate(test.before)
+
+ assert.Equal(t, test.before, len(buf))
+ assert.LessOrEqual(t, test.before, cap(buf))
+ // check 0-initialized
+ for idx, c := range buf {
+ assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
+ }
+
+ buf = a.Reallocate(test.after, buf)
+ defer a.Free(buf)
+ assert.Equal(t, test.after, len(buf))
+ assert.LessOrEqual(t, test.after, cap(buf))
+ // check 0-initialized
+ for idx, c := range buf {
+ assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
+ }
+ })
+ }
+}
+
+func TestMallocatorAssertSize(t *testing.T) {
+ a := mallocator.NewMallocator()
+ assert.Equal(t, int64(0), a.AllocatedBytes())
+
+ buf1 := a.Allocate(64)
+ a.AssertSize(t, 64)
+
+ buf2 := a.Allocate(128)
+ a.AssertSize(t, 192)
+ assert.Equal(t, int64(192), a.AllocatedBytes())
+
+ a.Free(buf1)
+ a.AssertSize(t, 128)
+ assert.Equal(t, int64(128), a.AllocatedBytes())
+
+ buf2 = a.Reallocate(256, buf2)
+ a.AssertSize(t, 256)
+ assert.Equal(t, int64(256), a.AllocatedBytes())
+
+ buf2 = a.Reallocate(64, buf2)
+ a.AssertSize(t, 64)
+ assert.Equal(t, int64(64), a.AllocatedBytes())
+
+ a.Free(buf2)
+ a.AssertSize(t, 0)
+ assert.Equal(t, int64(0), a.AllocatedBytes())
+}
+
+func TestMallocatorAllocateNegative(t *testing.T) {
+ a := mallocator.NewMallocator()
+ assert.PanicsWithValue(t, "mallocator: negative size", func() {
+ a.Allocate(-1)
+ })
+}
+
+func TestMallocatorReallocateNegative(t *testing.T) {
+ a := mallocator.NewMallocator()
+ buf := a.Allocate(1)
+ defer a.Free(buf)
+
+ assert.PanicsWithValue(t, "mallocator: negative size", func() {
+ a.Reallocate(-1, buf)
+ })
+}