You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/06/15 10:49:12 UTC

[GitHub] [arrow] pitrou commented on a diff in pull request #13373: ARROW-16717: [C++] Add support for system jemalloc

pitrou commented on code in PR #13373:
URL: https://github.com/apache/arrow/pull/13373#discussion_r897824453


##########
cpp/src/arrow/memory_pool_jemalloc.cc:
##########
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/memory_pool_internal.h"
+#include "arrow/util/logging.h"  // IWYU pragma: keep
+
+// We can't put the jemalloc memory pool implementation into
+// memory_pool.c because jemalloc.h may redefine malloc() and its
+// family by macros. It malloc() and its family are redefined by
+// jemalloc, our system memory pool also uses jemalloc's malloc() and
+// its family.

Review Comment:
   ```suggestion
   // family by macros. If malloc() and its family are redefined by
   // jemalloc, our system memory pool will also use jemalloc's malloc() and
   // its family.
   ```



##########
cpp/src/arrow/memory_pool_jemalloc.cc:
##########
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/memory_pool_internal.h"
+#include "arrow/util/logging.h"  // IWYU pragma: keep
+
+// We can't put the jemalloc memory pool implementation into
+// memory_pool.c because jemalloc.h may redefine malloc() and its
+// family by macros. It malloc() and its family are redefined by
+// jemalloc, our system memory pool also uses jemalloc's malloc() and
+// its family.
+
+#ifdef ARROW_JEMALLOC_VENDORED
+#define JEMALLOC_MANGLE
+// Explicitly link to our version of jemalloc
+#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#else
+#include <jemalloc/jemalloc.h>
+#endif
+
+#ifdef ARROW_JEMALLOC_VENDORED
+// Compile-time configuration for vendored jemalloc options.
+// Note the prefix ("je_arrow_") must match the symbol prefix given when
+// building jemalloc.
+// See discussion in https://github.com/jemalloc/jemalloc/issues/1621
+
+// ARROW-6910(wesm): we found that jemalloc's default behavior with respect to
+// dirty / muzzy pages (see definitions of these in the jemalloc documentation)
+// conflicted with user expectations, and would even cause memory use problems
+// in some cases. By enabling the background_thread option and reducing the
+// decay time from 10 seconds to 1 seconds, memory is released more
+// aggressively (and in the background) to the OS. This can be configured
+// further by using the arrow::jemalloc_set_decay_ms API
+
+#undef USE_JEMALLOC_BACKGROUND_THREAD
+#ifndef __APPLE__
+// ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
+#define USE_JEMALLOC_BACKGROUND_THREAD
+#endif
+
+// In debug mode, add memory poisoning on alloc / free
+#ifdef NDEBUG
+#define JEMALLOC_DEBUG_OPTIONS ""
+#else
+#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
+#endif
+
+const char* je_arrow_malloc_conf =
+    ("oversize_threshold:0"
+#ifdef USE_JEMALLOC_BACKGROUND_THREAD
+     ",dirty_decay_ms:1000"
+     ",muzzy_decay_ms:1000"
+     ",background_thread:true"
+#else
+     // ARROW-6994: return memory immediately to the OS if the
+     // background_thread option isn't available
+     ",dirty_decay_ms:0"
+     ",muzzy_decay_ms:0"
+#endif
+     JEMALLOC_DEBUG_OPTIONS);  // NOLINT: whitespace/parens
+
+#endif  // ARROW_JEMALLOC_VENDORED
+
+namespace arrow {
+
+namespace memory_pool {
+
+namespace internal {
+
+Status JemallocAllocator::AllocateAligned(int64_t size, uint8_t** out) {
+  if (size == 0) {
+    *out = kZeroSizeArea;
+    return Status::OK();
+  }
+  *out = reinterpret_cast<uint8_t*>(
+      mallocx(static_cast<size_t>(size), MALLOCX_ALIGN(kAlignment)));
+  if (*out == NULL) {
+    return Status::OutOfMemory("malloc of size ", size, " failed");
+  }
+  return Status::OK();
+}
+
+Status JemallocAllocator::ReallocateAligned(int64_t old_size, int64_t new_size,
+                                            uint8_t** ptr) {
+  uint8_t* previous_ptr = *ptr;
+  if (previous_ptr == kZeroSizeArea) {
+    DCHECK_EQ(old_size, 0);
+    return AllocateAligned(new_size, ptr);
+  }
+  if (new_size == 0) {
+    DeallocateAligned(previous_ptr, old_size);
+    *ptr = kZeroSizeArea;
+    return Status::OK();
+  }
+  *ptr = reinterpret_cast<uint8_t*>(
+      rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
+  if (*ptr == NULL) {
+    *ptr = previous_ptr;
+    return Status::OutOfMemory("realloc of size ", new_size, " failed");
+  }
+  return Status::OK();
+}
+
+void JemallocAllocator::DeallocateAligned(uint8_t* ptr, int64_t size) {
+  if (ptr == kZeroSizeArea) {
+    DCHECK_EQ(size, 0);
+  } else {
+    dallocx(ptr, MALLOCX_ALIGN(kAlignment));
+  }
+}
+
+void JemallocAllocator::ReleaseUnused() {
+  mallctl("arena." ARROW_STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, 0);
+}
+
+}  // namespace internal
+
+}  // namespace memory_pool
+
+#define RETURN_IF_JEMALLOC_ERROR(ERR)                  \

Review Comment:
   Can we `#undef` it after using it?



##########
dev/tasks/linux-packages/apache-arrow/debian/libarrow-dev.install:
##########
@@ -5,7 +5,7 @@ usr/lib/*/cmake/arrow/ArrowTargets*.cmake
 usr/lib/*/cmake/arrow/Find*Alt.cmake
 usr/lib/*/cmake/arrow/FindArrow.cmake
 usr/lib/*/cmake/arrow/FindBrotli.cmake
-usr/lib/*/cmake/arrow/Find[STuz]*.cmake
+usr/lib/*/cmake/arrow/Find[STjuz]*.cmake

Review Comment:
   er... what's this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org