You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/01/31 20:00:59 UTC

[arrow] branch master updated: GH-33901: [Go] Add a malloc-based allocator (#33902)

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3affadad5d GH-33901: [Go] Add a malloc-based allocator (#33902)
3affadad5d is described below

commit 3affadad5de97005ef587b95638ae4693a7eed1f
Author: David Li <li...@gmail.com>
AuthorDate: Tue Jan 31 15:00:51 2023 -0500

    GH-33901: [Go] Add a malloc-based allocator (#33902)
    
    ### Rationale for this change
    
    Add a cgo-based allocator for use with the C Data Interface. (See #33900.)
    
    ### What changes are included in this PR?
    
    Add a new allocator implementation, `Mallocator`.
    
    ### Are these changes tested?
    
    A new test suite was added.
    
    ### Are there any user-facing changes?
    
    Adds a new public implementation of an API.
    * Closes: #33901
    
    Authored-by: David Li <li...@gmail.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 go/arrow/memory/allocator.go                       |   6 -
 .../memory/{allocator.go => default_allocator.go}  |  12 +-
 .../memory/{allocator.go => default_mallocator.go} |  14 +--
 .../memory/{doc.go => default_mallocator_test.go}  |  19 +++-
 go/arrow/memory/doc.go                             |   2 +
 go/arrow/memory/mallocator/mallocator.go           | 115 +++++++++++++++++++
 go/arrow/memory/mallocator/mallocator_test.go      | 125 +++++++++++++++++++++
 7 files changed, 264 insertions(+), 29 deletions(-)

diff --git a/go/arrow/memory/allocator.go b/go/arrow/memory/allocator.go
index da6be44e37..1427190eaa 100644
--- a/go/arrow/memory/allocator.go
+++ b/go/arrow/memory/allocator.go
@@ -25,9 +25,3 @@ type Allocator interface {
 	Reallocate(size int, b []byte) []byte
 	Free(b []byte)
 }
-
-// DefaultAllocator is a default implementation of Allocator and can be used anywhere
-// an Allocator is required.
-//
-// DefaultAllocator is safe to use from multiple goroutines.
-var DefaultAllocator Allocator = NewGoAllocator()
diff --git a/go/arrow/memory/allocator.go b/go/arrow/memory/default_allocator.go
similarity index 88%
copy from go/arrow/memory/allocator.go
copy to go/arrow/memory/default_allocator.go
index da6be44e37..1c63470d1c 100644
--- a/go/arrow/memory/allocator.go
+++ b/go/arrow/memory/default_allocator.go
@@ -14,17 +14,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package memory
-
-const (
-	alignment = 64
-)
+//go:build !mallocator
 
-type Allocator interface {
-	Allocate(size int) []byte
-	Reallocate(size int, b []byte) []byte
-	Free(b []byte)
-}
+package memory
 
 // DefaultAllocator is a default implementation of Allocator and can be used anywhere
 // an Allocator is required.
diff --git a/go/arrow/memory/allocator.go b/go/arrow/memory/default_mallocator.go
similarity index 84%
copy from go/arrow/memory/allocator.go
copy to go/arrow/memory/default_mallocator.go
index da6be44e37..17cd487c66 100644
--- a/go/arrow/memory/allocator.go
+++ b/go/arrow/memory/default_mallocator.go
@@ -14,20 +14,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//go:build mallocator
+
 package memory
 
-const (
-	alignment = 64
+import (
+	"github.com/apache/arrow/go/v12/arrow/memory/mallocator"
 )
 
-type Allocator interface {
-	Allocate(size int) []byte
-	Reallocate(size int, b []byte) []byte
-	Free(b []byte)
-}
-
 // DefaultAllocator is a default implementation of Allocator and can be used anywhere
 // an Allocator is required.
 //
 // DefaultAllocator is safe to use from multiple goroutines.
-var DefaultAllocator Allocator = NewGoAllocator()
+var DefaultAllocator Allocator = mallocator.NewMallocator()
diff --git a/go/arrow/memory/doc.go b/go/arrow/memory/default_mallocator_test.go
similarity index 71%
copy from go/arrow/memory/doc.go
copy to go/arrow/memory/default_mallocator_test.go
index 959f88b4f2..85ad725571 100644
--- a/go/arrow/memory/doc.go
+++ b/go/arrow/memory/default_mallocator_test.go
@@ -14,7 +14,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-/*
-Package memory provides support for allocating and manipulating memory at a low level.
-*/
-package memory
+//go:build mallocator
+
+package memory_test
+
+import (
+	"testing"
+
+	"github.com/apache/arrow/go/v12/arrow/memory"
+	"github.com/apache/arrow/go/v12/arrow/memory/mallocator"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDefaultAllocator(t *testing.T) {
+	assert.IsType(t, &mallocator.Mallocator{}, memory.DefaultAllocator)
+}
diff --git a/go/arrow/memory/doc.go b/go/arrow/memory/doc.go
index 959f88b4f2..20a28e4e2a 100644
--- a/go/arrow/memory/doc.go
+++ b/go/arrow/memory/doc.go
@@ -16,5 +16,7 @@
 
 /*
 Package memory provides support for allocating and manipulating memory at a low level.
+
+The build tag 'mallocator' will switch the default allocator to one backed by libc malloc. This also requires CGO.
 */
 package memory
diff --git a/go/arrow/memory/mallocator/mallocator.go b/go/arrow/memory/mallocator/mallocator.go
new file mode 100644
index 0000000000..18e0377c4f
--- /dev/null
+++ b/go/arrow/memory/mallocator/mallocator.go
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package mallocator
+
+// #include <stdlib.h>
+// #include <string.h>
+//
+// void* realloc_and_initialize(void* ptr, size_t old_len, size_t new_len) {
+//   void* new_ptr = realloc(ptr, new_len);
+//   if (new_ptr && new_len > old_len) {
+//     memset(new_ptr + old_len, 0, new_len - old_len);
+//   }
+//   return new_ptr;
+// }
+import "C"
+
+import (
+	"reflect"
+	"sync/atomic"
+	"unsafe"
+)
+
+// Mallocator is an allocator which defers to libc malloc.
+//
+// The priamry reason to use this is when exporting data across the C Data
+// Interface. CGO requires that pointers to Go memory are not stored in C
+// memory, which is exactly what the C Data Interface would otherwise
+// require. By allocating with Mallocator up front, we can safely export the
+// buffers in Arrow arrays without copying buffers or violating CGO rules.
+//
+// The build tag 'mallocator' will also make this the default allocator.
+type Mallocator struct {
+	allocatedBytes uint64
+}
+
+func NewMallocator() *Mallocator { return &Mallocator{} }
+
+func (alloc *Mallocator) Allocate(size int) []byte {
+	// Use calloc to zero-initialize memory.
+	// > ...the current implementation may sometimes cause a runtime error if the
+	// > contents of the C memory appear to be a Go pointer. Therefore, avoid
+	// > passing uninitialized C memory to Go code if the Go code is going to store
+	// > pointer values in it. Zero out the memory in C before passing it to Go.
+	if size < 0 {
+		panic("mallocator: negative size")
+	}
+	ptr, err := C.calloc(C.size_t(size), 1)
+	if err != nil {
+		panic(err)
+	} else if ptr == nil {
+		panic("mallocator: out of memory")
+	}
+	atomic.AddUint64(&alloc.allocatedBytes, uint64(size))
+	return unsafe.Slice((*byte)(ptr), size)
+}
+
+func (alloc *Mallocator) Free(b []byte) {
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+	C.free(unsafe.Pointer(sh.Data))
+	// Subtract sh.Len via two's complement (since atomic doesn't offer subtract)
+	atomic.AddUint64(&alloc.allocatedBytes, ^(uint64(sh.Len) - 1))
+}
+
+func (alloc *Mallocator) Reallocate(size int, b []byte) []byte {
+	if size < 0 {
+		panic("mallocator: negative size")
+	}
+	sh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+	ptr, err := C.realloc_and_initialize(unsafe.Pointer(sh.Data), C.size_t(sh.Cap), C.size_t(size))
+	if err != nil {
+		panic(err)
+	} else if ptr == nil && size != 0 {
+		panic("mallocator: out of memory")
+	}
+	delta := size - len(b)
+	if delta >= 0 {
+		atomic.AddUint64(&alloc.allocatedBytes, uint64(delta))
+	} else {
+		atomic.AddUint64(&alloc.allocatedBytes, ^(uint64(-delta) - 1))
+	}
+	return unsafe.Slice((*byte)(ptr), size)
+}
+
+func (alloc *Mallocator) AllocatedBytes() int64 {
+	return int64(alloc.allocatedBytes)
+}
+
+// Duplicate interface to avoid circular import
+type TestingT interface {
+	Errorf(format string, args ...interface{})
+	Helper()
+}
+
+func (alloc *Mallocator) AssertSize(t TestingT, sz int) {
+	cur := alloc.AllocatedBytes()
+	if int64(sz) != cur {
+		t.Helper()
+		t.Errorf("invalid memory size exp=%d, got=%d", sz, cur)
+	}
+}
diff --git a/go/arrow/memory/mallocator/mallocator_test.go b/go/arrow/memory/mallocator/mallocator_test.go
new file mode 100644
index 0000000000..40e8876b28
--- /dev/null
+++ b/go/arrow/memory/mallocator/mallocator_test.go
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package mallocator_test
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/apache/arrow/go/v12/arrow/memory/mallocator"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestMallocatorAllocate(t *testing.T) {
+	sizes := []int{0, 1, 4, 33, 65, 4095, 4096, 8193}
+	for _, size := range sizes {
+		t.Run(fmt.Sprint(size), func(t *testing.T) {
+			a := mallocator.NewMallocator()
+			buf := a.Allocate(size)
+			defer a.Free(buf)
+
+			assert.Equal(t, size, len(buf))
+			assert.LessOrEqual(t, size, cap(buf))
+			// check 0-initialized
+			for idx, c := range buf {
+				assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
+			}
+		})
+	}
+}
+
+func TestMallocatorReallocate(t *testing.T) {
+	sizes := []struct {
+		before, after int
+	}{
+		{0, 1},
+		{1, 0},
+		{1, 2},
+		{1, 33},
+		{4, 4},
+		{32, 16},
+		{32, 1},
+	}
+	for _, test := range sizes {
+		t.Run(fmt.Sprintf("%dTo%d", test.before, test.after), func(t *testing.T) {
+			a := mallocator.NewMallocator()
+			buf := a.Allocate(test.before)
+
+			assert.Equal(t, test.before, len(buf))
+			assert.LessOrEqual(t, test.before, cap(buf))
+			// check 0-initialized
+			for idx, c := range buf {
+				assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
+			}
+
+			buf = a.Reallocate(test.after, buf)
+			defer a.Free(buf)
+			assert.Equal(t, test.after, len(buf))
+			assert.LessOrEqual(t, test.after, cap(buf))
+			// check 0-initialized
+			for idx, c := range buf {
+				assert.Equal(t, uint8(0), c, fmt.Sprintf("Buf not zero-initialized at %d", idx))
+			}
+		})
+	}
+}
+
+func TestMallocatorAssertSize(t *testing.T) {
+	a := mallocator.NewMallocator()
+	assert.Equal(t, int64(0), a.AllocatedBytes())
+
+	buf1 := a.Allocate(64)
+	a.AssertSize(t, 64)
+
+	buf2 := a.Allocate(128)
+	a.AssertSize(t, 192)
+	assert.Equal(t, int64(192), a.AllocatedBytes())
+
+	a.Free(buf1)
+	a.AssertSize(t, 128)
+	assert.Equal(t, int64(128), a.AllocatedBytes())
+
+	buf2 = a.Reallocate(256, buf2)
+	a.AssertSize(t, 256)
+	assert.Equal(t, int64(256), a.AllocatedBytes())
+
+	buf2 = a.Reallocate(64, buf2)
+	a.AssertSize(t, 64)
+	assert.Equal(t, int64(64), a.AllocatedBytes())
+
+	a.Free(buf2)
+	a.AssertSize(t, 0)
+	assert.Equal(t, int64(0), a.AllocatedBytes())
+}
+
+func TestMallocatorAllocateNegative(t *testing.T) {
+	a := mallocator.NewMallocator()
+	assert.PanicsWithValue(t, "mallocator: negative size", func() {
+		a.Allocate(-1)
+	})
+}
+
+func TestMallocatorReallocateNegative(t *testing.T) {
+	a := mallocator.NewMallocator()
+	buf := a.Allocate(1)
+	defer a.Free(buf)
+
+	assert.PanicsWithValue(t, "mallocator: negative size", func() {
+		a.Reallocate(-1, buf)
+	})
+}