You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by so...@apache.org on 2015/06/08 17:40:23 UTC
trafficserver git commit: TS-3122: Add support for hugepages on Linux
Repository: trafficserver
Updated Branches:
refs/heads/master 1a0832b3b -> bba557870
TS-3122: Add support for hugepages on Linux
Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787
Branch: refs/heads/master
Commit: bba557870c05222d302a05ec948871cdde8bf63b
Parents: 1a0832b
Author: Phil Sorber <so...@apache.org>
Authored: Thu Oct 16 19:58:08 2014 -0600
Committer: Phil Sorber <so...@apache.org>
Committed: Mon Jun 8 09:28:56 2015 -0600
----------------------------------------------------------------------
.../configuration/records.config.en.rst | 12 ++
iocore/cache/Cache.cc | 10 +-
iocore/cache/CacheDir.cc | 53 +++++--
iocore/cache/P_CacheDir.h | 4 +-
lib/ts/Makefile.am | 2 +
lib/ts/hugepages.cc | 144 +++++++++++++++++++
lib/ts/hugepages.h | 32 +++++
lib/ts/ink_queue.cc | 21 ++-
lib/ts/libts.h | 1 +
mgmt/RecordsConfig.cc | 2 +
proxy/Main.cc | 7 +
11 files changed, 267 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst
----------------------------------------------------------------------
diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst
index 694d338..ccced34 100644
--- a/doc/reference/configuration/records.config.en.rst
+++ b/doc/reference/configuration/records.config.en.rst
@@ -2849,6 +2849,18 @@ Sockets
Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be
holding at any one time.
+.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0
+
+ Enable (1) the use of huge pages on supported platforms. (Currently only Linux)
+
+ You must also enable hugepages at the OS level. In a modern linux Kernel
+ this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a
+ sufficiently large value. It is reasonable to use (system
+ memory/hugepage size) because these pages are only created on demand.
+
+ For more information on the implications of enabling huge pages, see
+ `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`.
+
.. ts:cv:: CONFIG proxy.config.http.enabled INT 1
Turn on or off support for HTTP proxying. This is rarely used, the one
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
index becf713..370c516 100644
--- a/iocore/cache/Cache.cc
+++ b/iocore/cache/Cache.cc
@@ -38,6 +38,8 @@
#include "P_CacheBC.h"
#endif
+#include "hugepages.h"
+
// Compilation Options
#define USELESS_REENABLES // allow them for now
// #define VERIFY_JTEST_DATA
@@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear)
Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len,
(double)vol_dirlen(this) / (double)this->len * 100.0);
- raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
+
+ raw_dir = NULL;
+ if (ats_hugepage_enabled())
+ raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this));
+ if (raw_dir == NULL)
+ raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
+
dir = (Dir *)(raw_dir + vol_headerlen(this));
header = (VolHeaderFooter *)raw_dir;
footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter)));
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc
index 3a7b9c4..e0f20d3 100644
--- a/iocore/cache/CacheDir.cc
+++ b/iocore/cache/CacheDir.cc
@@ -24,6 +24,8 @@
#include "P_Cache.h"
+#include "hugepages.h"
+
// #define LOOP_CHECK_MODE 1
#ifdef LOOP_CHECK_MODE
#define DIR_LOOP_THRESHOLD 1000
@@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void)
Debug("cache_dir_sync", "sync started");
char *buf = NULL;
size_t buflen = 0;
+ bool buf_huge = false;
EThread *t = (EThread *)0xdeadbeef;
for (int i = 0; i < gnvol; i++) {
@@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void)
#endif
if (buflen < dirlen) {
- if (buf)
- ats_memalign_free(buf);
- buf = (char *)ats_memalign(ats_pagesize(), dirlen);
+ if (buf) {
+ if (buf_huge)
+ ats_free_hugepage(buf, buflen);
+ else
+ ats_memalign_free(buf);
+ }
buflen = dirlen;
+ if (ats_hugepage_enabled()) {
+ buf = (char *)ats_alloc_hugepage(buflen);
+ buf_huge = true;
+ }
+ if (buf == NULL) {
+ buf = (char *)ats_memalign(ats_pagesize(), buflen);
+ buf_huge = false;
+ }
}
if (!d->dir_sync_in_progress) {
@@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void)
Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
}
Debug("cache_dir_sync", "sync done");
- if (buf)
- ats_memalign_free(buf);
+ if (buf) {
+ if (buf_huge)
+ ats_free_hugepage(buf, buflen);
+ else
+ ats_memalign_free(buf);
+ buflen = 0;
+ buf = NULL;
+ buf_huge = false;
+ }
}
@@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e)
Lrestart:
if (vol_idx >= gnvol) {
vol_idx = 0;
- if (buf) {
- ats_memalign_free(buf);
- buf = 0;
- buflen = 0;
- }
Debug("cache_dir_sync", "sync done");
if (event == EVENT_INTERVAL)
trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
@@ -1196,10 +1212,21 @@ Lrestart:
Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
vol->header->dirty = 0;
if (buflen < dirlen) {
- if (buf)
- ats_memalign_free(buf);
- buf = (char *)ats_memalign(ats_pagesize(), dirlen);
+ if (buf) {
+ if (buf_huge)
+ ats_free_hugepage(buf, buflen);
+ else
+ ats_memalign_free(buf);
+ }
buflen = dirlen;
+ if (ats_hugepage_enabled()) {
+ buf = (char *)ats_alloc_hugepage(buflen);
+ buf_huge = true;
+ }
+ if (buf == NULL) {
+ buf = (char *)ats_memalign(ats_pagesize(), buflen);
+ buf_huge = false;
+ }
}
vol->header->sync_serial++;
vol->footer->sync_serial = vol->header->sync_serial;
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h
----------------------------------------------------------------------
diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h
index 268ecfb..881d6be 100644
--- a/iocore/cache/P_CacheDir.h
+++ b/iocore/cache/P_CacheDir.h
@@ -295,6 +295,7 @@ struct CacheSync : public Continuation {
int vol_idx;
char *buf;
size_t buflen;
+ bool buf_huge;
off_t writepos;
AIOCallbackInternal io;
Event *trigger;
@@ -302,7 +303,8 @@ struct CacheSync : public Continuation {
int mainEvent(int event, Event *e);
void aio_write(int fd, char *b, int n, off_t o);
- CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0)
+ CacheSync()
+ : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0)
{
SET_HANDLER(&CacheSync::mainEvent);
}
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am
----------------------------------------------------------------------
diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
index 7e04222..18c18cf 100644
--- a/lib/ts/Makefile.am
+++ b/lib/ts/Makefile.am
@@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \
defalloc.h \
fastlz.c \
fastlz.h \
+ hugepages.cc \
+ hugepages.h \
ink_aiocb.h \
ink_align.h \
ink_apidefs.h \
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc
----------------------------------------------------------------------
diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc
new file mode 100644
index 0000000..216bf0b
--- /dev/null
+++ b/lib/ts/hugepages.cc
@@ -0,0 +1,144 @@
+/** @file
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+
+#include <cstdio>
+#include <sys/mman.h>
+#include "Diags.h"
+#include "ink_align.h"
+
+#define DEBUG_TAG "hugepages"
+#define MEMINFO_PATH "/proc/meminfo"
+#define LINE_SIZE 256
+#define TOKEN "Hugepagesize:"
+#define TOKEN_SIZE (strlen(TOKEN))
+
+static int hugepage_size = -1;
+static bool hugepage_enabled;
+
+size_t
+ats_hugepage_size(void)
+{
+#ifdef MAP_HUGETLB
+ return hugepage_size;
+#else
+ Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+ return 0;
+#endif
+}
+
+bool
+ats_hugepage_enabled(void)
+{
+#ifdef MAP_HUGETLB
+ return hugepage_enabled;
+#else
+ return false;
+#endif
+}
+
+void
+ats_hugepage_init(int enabled)
+{
+#ifdef MAP_HUGETLB
+ FILE *fp;
+ char line[LINE_SIZE];
+ char *p, *ep;
+
+ hugepage_size = 0;
+
+ if (!enabled) {
+ Debug(DEBUG_TAG, "hugepages not enabled");
+ return;
+ }
+
+ fp = fopen(MEMINFO_PATH, "r");
+
+ if (fp == NULL) {
+ Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH);
+ return;
+ }
+
+ while (fgets(line, sizeof(line), fp)) {
+ if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) {
+ p = line + TOKEN_SIZE;
+ while (*p == ' ') {
+ p++;
+ }
+ hugepage_size = strtol(p, &ep, 10);
+ // What other values can this be?
+ if (strncmp(ep, " kB", 4)) {
+ hugepage_size *= 1024;
+ }
+ break;
+ }
+ }
+
+ fclose(fp);
+
+ if (hugepage_size) {
+ hugepage_enabled = true;
+ }
+
+ Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size);
+#else
+ Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+#endif
+}
+
+void *
+ats_alloc_hugepage(size_t s)
+{
+#ifdef MAP_HUGETLB
+ size_t size;
+ void *mem;
+
+ size = INK_ALIGN(s, ats_hugepage_size());
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+ if (mem == MAP_FAILED) {
+ Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size);
+ return NULL;
+ }
+
+ return mem;
+#else
+ (void)s;
+ Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+ return NULL;
+#endif
+}
+
+bool
+ats_free_hugepage(void *ptr, size_t s)
+{
+#ifdef MAP_HUGETLB
+ size_t size;
+
+ size = INK_ALIGN(s, ats_hugepage_size());
+ return (munmap(ptr, size) == 0);
+#else
+ (void)ptr;
+ (void)s;
+ Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+ return false;
+#endif
+}
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h
----------------------------------------------------------------------
diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h
new file mode 100644
index 0000000..812542b
--- /dev/null
+++ b/lib/ts/hugepages.h
@@ -0,0 +1,32 @@
+/** @file
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ */
+#ifndef _hugepages_h_
+#define _hugepages_h_
+
+#include <cstring>
+
+size_t ats_hugepage_size(void);
+bool ats_hugepage_enabled(void);
+void ats_hugepage_init(int);
+void *ats_alloc_hugepage(size_t);
+bool ats_free_hugepage(void *, size_t);
+
+#endif
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc
----------------------------------------------------------------------
diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc
index e718b3f..0f14b68 100644
--- a/lib/ts/ink_queue.cc
+++ b/lib/ts/ink_queue.cc
@@ -50,6 +50,7 @@
#include "ink_assert.h"
#include "ink_queue_ext.h"
#include "ink_align.h"
+#include "hugepages.h"
inkcoreapi volatile int64_t fastalloc_mem_in_use = 0;
inkcoreapi volatile int64_t fastalloc_mem_total = 0;
@@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32
/* quick test for power of 2 */
ink_assert(!(alignment & (alignment - 1)));
f->alignment = alignment;
- f->chunk_size = chunk_size;
// Make sure we align *all* the objects in the allocation, not just the first one
f->type_size = INK_ALIGN(type_size, alignment);
+ if (ats_hugepage_enabled()) {
+ f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size;
+ } else {
+ f->chunk_size = chunk_size;
+ }
SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0);
f->used = 0;
@@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f)
#ifdef DEBUG
char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL;
#endif
- if (f->alignment)
- newp = ats_memalign(f->alignment, f->chunk_size * type_size);
- else
- newp = ats_malloc(f->chunk_size * type_size);
+ if (ats_hugepage_enabled())
+ newp = ats_alloc_hugepage(f->chunk_size * type_size);
+
+ if (newp == NULL) {
+ if (f->alignment)
+ newp = ats_memalign(f->alignment, f->chunk_size * type_size);
+ else
+ newp = ats_malloc(f->chunk_size * type_size);
+ }
ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice);
-
fl_memadd(f->chunk_size * type_size);
#ifdef DEBUG
newsbrk = (char *)sbrk(0);
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h
----------------------------------------------------------------------
diff --git a/lib/ts/libts.h b/lib/ts/libts.h
index f136d74..a99e67f 100644
--- a/lib/ts/libts.h
+++ b/lib/ts/libts.h
@@ -41,6 +41,7 @@
#define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ ***
*/
+#include "hugepages.h"
#include "ink_config.h"
#include "ink_platform.h"
#include "ink_align.h"
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc
----------------------------------------------------------------------
diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
index ebcb8fd..ec3387a 100644
--- a/mgmt/RecordsConfig.cc
+++ b/mgmt/RecordsConfig.cc
@@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] =
,
{RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL}
,
+ {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL}
+ ,
//############
//#
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc
----------------------------------------------------------------------
diff --git a/proxy/Main.cc b/proxy/Main.cc
index 202da33..4684945 100644
--- a/proxy/Main.cc
+++ b/proxy/Main.cc
@@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
// Restart syslog now that we have configuration info
syslog_log_configure();
+ // init huge pages
+ int enabled;
+ REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages");
+ ats_hugepage_init(enabled);
+ Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize());
+ Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size());
+
if (!num_accept_threads)
REC_ReadConfigInteger(num_accept_threads, "proxy.config.accept_threads");
Re: trafficserver git commit: TS-3122: Add support for hugepages on Linux
Posted by James Peach <jp...@apache.org>.
> On Jun 8, 2015, at 8:40 AM, sorber@apache.org wrote:
>
> Repository: trafficserver
> Updated Branches:
> refs/heads/master 1a0832b3b -> bba557870
>
>
> TS-3122: Add support for hugepages on Linux
>
>
> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
> Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787
> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787
> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787
>
> Branch: refs/heads/master
> Commit: bba557870c05222d302a05ec948871cdde8bf63b
> Parents: 1a0832b
> Author: Phil Sorber <so...@apache.org>
> Authored: Thu Oct 16 19:58:08 2014 -0600
> Committer: Phil Sorber <so...@apache.org>
> Committed: Mon Jun 8 09:28:56 2015 -0600
>
> ----------------------------------------------------------------------
> .../configuration/records.config.en.rst | 12 ++
> iocore/cache/Cache.cc | 10 +-
> iocore/cache/CacheDir.cc | 53 +++++--
> iocore/cache/P_CacheDir.h | 4 +-
> lib/ts/Makefile.am | 2 +
> lib/ts/hugepages.cc | 144 +++++++++++++++++++
> lib/ts/hugepages.h | 32 +++++
> lib/ts/ink_queue.cc | 21 ++-
> lib/ts/libts.h | 1 +
> mgmt/RecordsConfig.cc | 2 +
> proxy/Main.cc | 7 +
> 11 files changed, 267 insertions(+), 21 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst
> ----------------------------------------------------------------------
> diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst
> index 694d338..ccced34 100644
> --- a/doc/reference/configuration/records.config.en.rst
> +++ b/doc/reference/configuration/records.config.en.rst
> @@ -2849,6 +2849,18 @@ Sockets
> Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be
> holding at any one time.
>
> +.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0
> +
> + Enable (1) the use of huge pages on supported platforms. (Currently only Linux)
> +
> + You must also enable hugepages at the OS level. In a modern linux Kernel
> + this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a
> + sufficiently large value. It is reasonable to use (system
> + memory/hugepage size) because these pages are only created on demand.
> +
> + For more information on the implications of enabling huge pages, see
> + `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`.
> +
> .. ts:cv:: CONFIG proxy.config.http.enabled INT 1
I think it would be very helpful to hear about your experiences with this feature ... how about starting a thread on dev@?
>
> Turn on or off support for HTTP proxying. This is rarely used, the one
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
> index becf713..370c516 100644
> --- a/iocore/cache/Cache.cc
> +++ b/iocore/cache/Cache.cc
> @@ -38,6 +38,8 @@
> #include "P_CacheBC.h"
> #endif
>
> +#include "hugepages.h"
> +
> // Compilation Options
> #define USELESS_REENABLES // allow them for now
> // #define VERIFY_JTEST_DATA
> @@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear)
>
> Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len,
> (double)vol_dirlen(this) / (double)this->len * 100.0);
> - raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
> + raw_dir = NULL;
> + if (ats_hugepage_enabled())
> + raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this));
> + if (raw_dir == NULL)
> + raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
> dir = (Dir *)(raw_dir + vol_headerlen(this));
> header = (VolHeaderFooter *)raw_dir;
> footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter)));
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc
> index 3a7b9c4..e0f20d3 100644
> --- a/iocore/cache/CacheDir.cc
> +++ b/iocore/cache/CacheDir.cc
> @@ -24,6 +24,8 @@
>
> #include "P_Cache.h"
>
> +#include "hugepages.h"
> +
> // #define LOOP_CHECK_MODE 1
> #ifdef LOOP_CHECK_MODE
> #define DIR_LOOP_THRESHOLD 1000
> @@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void)
> Debug("cache_dir_sync", "sync started");
> char *buf = NULL;
> size_t buflen = 0;
> + bool buf_huge = false;
>
> EThread *t = (EThread *)0xdeadbeef;
> for (int i = 0; i < gnvol; i++) {
> @@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void)
> #endif
>
> if (buflen < dirlen) {
> - if (buf)
> - ats_memalign_free(buf);
> - buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> + if (buf) {
> + if (buf_huge)
> + ats_free_hugepage(buf, buflen);
> + else
> + ats_memalign_free(buf);
> + }
> buflen = dirlen;
> + if (ats_hugepage_enabled()) {
> + buf = (char *)ats_alloc_hugepage(buflen);
> + buf_huge = true;
> + }
> + if (buf == NULL) {
> + buf = (char *)ats_memalign(ats_pagesize(), buflen);
> + buf_huge = false;
> + }
> }
>
> if (!d->dir_sync_in_progress) {
> @@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void)
> Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
> }
> Debug("cache_dir_sync", "sync done");
> - if (buf)
> - ats_memalign_free(buf);
> + if (buf) {
> + if (buf_huge)
> + ats_free_hugepage(buf, buflen);
> + else
> + ats_memalign_free(buf);
> + buflen = 0;
> + buf = NULL;
> + buf_huge = false;
> + }
> }
>
>
> @@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e)
> Lrestart:
> if (vol_idx >= gnvol) {
> vol_idx = 0;
> - if (buf) {
> - ats_memalign_free(buf);
> - buf = 0;
> - buflen = 0;
> - }
> Debug("cache_dir_sync", "sync done");
> if (event == EVENT_INTERVAL)
> trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
> @@ -1196,10 +1212,21 @@ Lrestart:
> Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
> vol->header->dirty = 0;
> if (buflen < dirlen) {
> - if (buf)
> - ats_memalign_free(buf);
> - buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> + if (buf) {
> + if (buf_huge)
> + ats_free_hugepage(buf, buflen);
> + else
> + ats_memalign_free(buf);
> + }
> buflen = dirlen;
> + if (ats_hugepage_enabled()) {
> + buf = (char *)ats_alloc_hugepage(buflen);
> + buf_huge = true;
> + }
> + if (buf == NULL) {
> + buf = (char *)ats_memalign(ats_pagesize(), buflen);
> + buf_huge = false;
> + }
> }
> vol->header->sync_serial++;
> vol->footer->sync_serial = vol->header->sync_serial;
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h
> index 268ecfb..881d6be 100644
> --- a/iocore/cache/P_CacheDir.h
> +++ b/iocore/cache/P_CacheDir.h
> @@ -295,6 +295,7 @@ struct CacheSync : public Continuation {
> int vol_idx;
> char *buf;
> size_t buflen;
> + bool buf_huge;
> off_t writepos;
> AIOCallbackInternal io;
> Event *trigger;
> @@ -302,7 +303,8 @@ struct CacheSync : public Continuation {
> int mainEvent(int event, Event *e);
> void aio_write(int fd, char *b, int n, off_t o);
>
> - CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0)
> + CacheSync()
> + : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0)
> {
> SET_HANDLER(&CacheSync::mainEvent);
> }
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am
> ----------------------------------------------------------------------
> diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
> index 7e04222..18c18cf 100644
> --- a/lib/ts/Makefile.am
> +++ b/lib/ts/Makefile.am
> @@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \
> defalloc.h \
> fastlz.c \
> fastlz.h \
> + hugepages.cc \
> + hugepages.h \
> ink_aiocb.h \
> ink_align.h \
> ink_apidefs.h \
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc
> new file mode 100644
> index 0000000..216bf0b
> --- /dev/null
> +++ b/lib/ts/hugepages.cc
> @@ -0,0 +1,144 @@
> +/** @file
> +
> + @section license License
> +
> + Licensed to the Apache Software Foundation (ASF) under one
> + or more contributor license agreements. See the NOTICE file
> + distributed with this work for additional information
> + regarding copyright ownership. The ASF licenses this file
> + to you under the Apache License, Version 2.0 (the
> + "License"); you may not use this file except in compliance
> + with the License. You may obtain a copy of the License at
> +
> + http://www.apache.org/licenses/LICENSE-2.0
> +
> + Unless required by applicable law or agreed to in writing, software
> + distributed under the License is distributed on an "AS IS" BASIS,
> + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + See the License for the specific language governing permissions and
> + limitations under the License.
> + */
> +
> +#include <cstdio>
> +#include <sys/mman.h>
> +#include "Diags.h"
> +#include "ink_align.h"
> +
> +#define DEBUG_TAG "hugepages"
> +#define MEMINFO_PATH "/proc/meminfo"
> +#define LINE_SIZE 256
> +#define TOKEN "Hugepagesize:"
> +#define TOKEN_SIZE (strlen(TOKEN))
> +
> +static int hugepage_size = -1;
> +static bool hugepage_enabled;
> +
> +size_t
> +ats_hugepage_size(void)
> +{
> +#ifdef MAP_HUGETLB
> + return hugepage_size;
> +#else
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> + return 0;
> +#endif
> +}
> +
> +bool
> +ats_hugepage_enabled(void)
> +{
> +#ifdef MAP_HUGETLB
> + return hugepage_enabled;
> +#else
> + return false;
> +#endif
> +}
> +
> +void
> +ats_hugepage_init(int enabled)
> +{
> +#ifdef MAP_HUGETLB
> + FILE *fp;
> + char line[LINE_SIZE];
> + char *p, *ep;
> +
> + hugepage_size = 0;
> +
> + if (!enabled) {
> + Debug(DEBUG_TAG, "hugepages not enabled");
> + return;
> + }
> +
> + fp = fopen(MEMINFO_PATH, "r");
> +
> + if (fp == NULL) {
> + Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH);
> + return;
> + }
> +
> + while (fgets(line, sizeof(line), fp)) {
> + if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) {
> + p = line + TOKEN_SIZE;
> + while (*p == ' ') {
> + p++;
> + }
> + hugepage_size = strtol(p, &ep, 10);
> + // What other values can this be?
> + if (strncmp(ep, " kB", 4)) {
> + hugepage_size *= 1024;
> + }
> + break;
> + }
> + }
> +
> + fclose(fp);
> +
> + if (hugepage_size) {
> + hugepage_enabled = true;
> + }
> +
> + Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size);
> +#else
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +#endif
> +}
> +
> +void *
> +ats_alloc_hugepage(size_t s)
> +{
> +#ifdef MAP_HUGETLB
> + size_t size;
> + void *mem;
> +
> + size = INK_ALIGN(s, ats_hugepage_size());
> +
> + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
> +
> + if (mem == MAP_FAILED) {
> + Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size);
> + return NULL;
> + }
> +
> + return mem;
> +#else
> + (void)s;
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> + return NULL;
> +#endif
> +}
> +
> +bool
> +ats_free_hugepage(void *ptr, size_t s)
> +{
> +#ifdef MAP_HUGETLB
> + size_t size;
> +
> + size = INK_ALIGN(s, ats_hugepage_size());
> + return (munmap(ptr, size) == 0);
> +#else
> + (void)ptr;
> + (void)s;
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> + return false;
> +#endif
> +}
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h
> new file mode 100644
> index 0000000..812542b
> --- /dev/null
> +++ b/lib/ts/hugepages.h
> @@ -0,0 +1,32 @@
> +/** @file
> +
> + @section license License
> +
> + Licensed to the Apache Software Foundation (ASF) under one
> + or more contributor license agreements. See the NOTICE file
> + distributed with this work for additional information
> + regarding copyright ownership. The ASF licenses this file
> + to you under the Apache License, Version 2.0 (the
> + "License"); you may not use this file except in compliance
> + with the License. You may obtain a copy of the License at
> +
> + http://www.apache.org/licenses/LICENSE-2.0
> +
> + Unless required by applicable law or agreed to in writing, software
> + distributed under the License is distributed on an "AS IS" BASIS,
> + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + See the License for the specific language governing permissions and
> + limitations under the License.
> + */
> +#ifndef _hugepages_h_
> +#define _hugepages_h_
> +
> +#include <cstring>
Why use the c* versions of these headers? We don't do that anywhere else and you are not actually using the names from teh std namespace in this patch either ...
> +
> +size_t ats_hugepage_size(void);
> +bool ats_hugepage_enabled(void);
> +void ats_hugepage_init(int);
> +void *ats_alloc_hugepage(size_t);
> +bool ats_free_hugepage(void *, size_t);
> +
> +#endif
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc
> index e718b3f..0f14b68 100644
> --- a/lib/ts/ink_queue.cc
> +++ b/lib/ts/ink_queue.cc
> @@ -50,6 +50,7 @@
> #include "ink_assert.h"
> #include "ink_queue_ext.h"
> #include "ink_align.h"
> +#include "hugepages.h"
>
> inkcoreapi volatile int64_t fastalloc_mem_in_use = 0;
> inkcoreapi volatile int64_t fastalloc_mem_total = 0;
> @@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32
> /* quick test for power of 2 */
> ink_assert(!(alignment & (alignment - 1)));
> f->alignment = alignment;
> - f->chunk_size = chunk_size;
> // Make sure we align *all* the objects in the allocation, not just the first one
> f->type_size = INK_ALIGN(type_size, alignment);
> + if (ats_hugepage_enabled()) {
> + f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size;
> + } else {
> + f->chunk_size = chunk_size;
> + }
> SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0);
>
> f->used = 0;
> @@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f)
> #ifdef DEBUG
> char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL;
> #endif
> - if (f->alignment)
> - newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> - else
> - newp = ats_malloc(f->chunk_size * type_size);
> + if (ats_hugepage_enabled())
> + newp = ats_alloc_hugepage(f->chunk_size * type_size);
> +
> + if (newp == NULL) {
> + if (f->alignment)
> + newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> + else
> + newp = ats_malloc(f->chunk_size * type_size);
> + }
> ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice);
> -
> fl_memadd(f->chunk_size * type_size);
> #ifdef DEBUG
> newsbrk = (char *)sbrk(0);
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/libts.h b/lib/ts/libts.h
> index f136d74..a99e67f 100644
> --- a/lib/ts/libts.h
> +++ b/lib/ts/libts.h
> @@ -41,6 +41,7 @@
> #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ ***
> */
>
> +#include "hugepages.h"
> #include "ink_config.h"
> #include "ink_platform.h"
> #include "ink_align.h"
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc
> ----------------------------------------------------------------------
> diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
> index ebcb8fd..ec3387a 100644
> --- a/mgmt/RecordsConfig.cc
> +++ b/mgmt/RecordsConfig.cc
> @@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] =
> ,
> {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL}
> ,
> + {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL}
> + ,
I would expect bad things to happen if you change this setting at runtime, so the RECU value should be RECU_RESTART_TS.
>
> //############
> //#
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc
> ----------------------------------------------------------------------
> diff --git a/proxy/Main.cc b/proxy/Main.cc
> index 202da33..4684945 100644
> --- a/proxy/Main.cc
> +++ b/proxy/Main.cc
> @@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
> // Restart syslog now that we have configuration info
> syslog_log_configure();
>
> + // init huge pages
> + int enabled;
> + REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages");
> + ats_hugepage_init(enabled);
> + Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize());
> + Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size());
Could you imagine there being a need to set this separately from RAM cache and for allocators?
J
Re: trafficserver git commit: TS-3122: Add support for hugepages on Linux
Posted by James Peach <jp...@apache.org>.
> On Jun 8, 2015, at 8:40 AM, sorber@apache.org wrote:
>
> Repository: trafficserver
> Updated Branches:
> refs/heads/master 1a0832b3b -> bba557870
>
>
> TS-3122: Add support for hugepages on Linux
>
>
> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
> Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787
> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787
> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787
>
> Branch: refs/heads/master
> Commit: bba557870c05222d302a05ec948871cdde8bf63b
> Parents: 1a0832b
> Author: Phil Sorber <so...@apache.org>
> Authored: Thu Oct 16 19:58:08 2014 -0600
> Committer: Phil Sorber <so...@apache.org>
> Committed: Mon Jun 8 09:28:56 2015 -0600
>
> ----------------------------------------------------------------------
> .../configuration/records.config.en.rst | 12 ++
> iocore/cache/Cache.cc | 10 +-
> iocore/cache/CacheDir.cc | 53 +++++--
> iocore/cache/P_CacheDir.h | 4 +-
> lib/ts/Makefile.am | 2 +
> lib/ts/hugepages.cc | 144 +++++++++++++++++++
> lib/ts/hugepages.h | 32 +++++
> lib/ts/ink_queue.cc | 21 ++-
> lib/ts/libts.h | 1 +
> mgmt/RecordsConfig.cc | 2 +
> proxy/Main.cc | 7 +
> 11 files changed, 267 insertions(+), 21 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst
> ----------------------------------------------------------------------
> diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst
> index 694d338..ccced34 100644
> --- a/doc/reference/configuration/records.config.en.rst
> +++ b/doc/reference/configuration/records.config.en.rst
> @@ -2849,6 +2849,18 @@ Sockets
> Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be
> holding at any one time.
>
> +.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0
> +
> + Enable (1) the use of huge pages on supported platforms. (Currently only Linux)
> +
> + You must also enable hugepages at the OS level. In a modern linux Kernel
> + this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a
> + sufficiently large value. It is reasonable to use (system
> + memory/hugepage size) because these pages are only created on demand.
> +
> + For more information on the implications of enabling huge pages, see
> + `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`.
> +
> .. ts:cv:: CONFIG proxy.config.http.enabled INT 1
I think it would be very helpful to hear about your experiences with this feature ... how about starting a thread on dev@?
>
> Turn on or off support for HTTP proxying. This is rarely used, the one
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
> index becf713..370c516 100644
> --- a/iocore/cache/Cache.cc
> +++ b/iocore/cache/Cache.cc
> @@ -38,6 +38,8 @@
> #include "P_CacheBC.h"
> #endif
>
> +#include "hugepages.h"
> +
> // Compilation Options
> #define USELESS_REENABLES // allow them for now
> // #define VERIFY_JTEST_DATA
> @@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear)
>
> Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len,
> (double)vol_dirlen(this) / (double)this->len * 100.0);
> - raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
> + raw_dir = NULL;
> + if (ats_hugepage_enabled())
> + raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this));
> + if (raw_dir == NULL)
> + raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
> dir = (Dir *)(raw_dir + vol_headerlen(this));
> header = (VolHeaderFooter *)raw_dir;
> footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter)));
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc
> index 3a7b9c4..e0f20d3 100644
> --- a/iocore/cache/CacheDir.cc
> +++ b/iocore/cache/CacheDir.cc
> @@ -24,6 +24,8 @@
>
> #include "P_Cache.h"
>
> +#include "hugepages.h"
> +
> // #define LOOP_CHECK_MODE 1
> #ifdef LOOP_CHECK_MODE
> #define DIR_LOOP_THRESHOLD 1000
> @@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void)
> Debug("cache_dir_sync", "sync started");
> char *buf = NULL;
> size_t buflen = 0;
> + bool buf_huge = false;
>
> EThread *t = (EThread *)0xdeadbeef;
> for (int i = 0; i < gnvol; i++) {
> @@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void)
> #endif
>
> if (buflen < dirlen) {
> - if (buf)
> - ats_memalign_free(buf);
> - buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> + if (buf) {
> + if (buf_huge)
> + ats_free_hugepage(buf, buflen);
> + else
> + ats_memalign_free(buf);
> + }
> buflen = dirlen;
> + if (ats_hugepage_enabled()) {
> + buf = (char *)ats_alloc_hugepage(buflen);
> + buf_huge = true;
> + }
> + if (buf == NULL) {
> + buf = (char *)ats_memalign(ats_pagesize(), buflen);
> + buf_huge = false;
> + }
> }
>
> if (!d->dir_sync_in_progress) {
> @@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void)
> Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
> }
> Debug("cache_dir_sync", "sync done");
> - if (buf)
> - ats_memalign_free(buf);
> + if (buf) {
> + if (buf_huge)
> + ats_free_hugepage(buf, buflen);
> + else
> + ats_memalign_free(buf);
> + buflen = 0;
> + buf = NULL;
> + buf_huge = false;
> + }
> }
>
>
> @@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e)
> Lrestart:
> if (vol_idx >= gnvol) {
> vol_idx = 0;
> - if (buf) {
> - ats_memalign_free(buf);
> - buf = 0;
> - buflen = 0;
> - }
> Debug("cache_dir_sync", "sync done");
> if (event == EVENT_INTERVAL)
> trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
> @@ -1196,10 +1212,21 @@ Lrestart:
> Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
> vol->header->dirty = 0;
> if (buflen < dirlen) {
> - if (buf)
> - ats_memalign_free(buf);
> - buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> + if (buf) {
> + if (buf_huge)
> + ats_free_hugepage(buf, buflen);
> + else
> + ats_memalign_free(buf);
> + }
> buflen = dirlen;
> + if (ats_hugepage_enabled()) {
> + buf = (char *)ats_alloc_hugepage(buflen);
> + buf_huge = true;
> + }
> + if (buf == NULL) {
> + buf = (char *)ats_memalign(ats_pagesize(), buflen);
> + buf_huge = false;
> + }
> }
> vol->header->sync_serial++;
> vol->footer->sync_serial = vol->header->sync_serial;
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h
> index 268ecfb..881d6be 100644
> --- a/iocore/cache/P_CacheDir.h
> +++ b/iocore/cache/P_CacheDir.h
> @@ -295,6 +295,7 @@ struct CacheSync : public Continuation {
> int vol_idx;
> char *buf;
> size_t buflen;
> + bool buf_huge;
> off_t writepos;
> AIOCallbackInternal io;
> Event *trigger;
> @@ -302,7 +303,8 @@ struct CacheSync : public Continuation {
> int mainEvent(int event, Event *e);
> void aio_write(int fd, char *b, int n, off_t o);
>
> - CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0)
> + CacheSync()
> + : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0)
> {
> SET_HANDLER(&CacheSync::mainEvent);
> }
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am
> ----------------------------------------------------------------------
> diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
> index 7e04222..18c18cf 100644
> --- a/lib/ts/Makefile.am
> +++ b/lib/ts/Makefile.am
> @@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \
> defalloc.h \
> fastlz.c \
> fastlz.h \
> + hugepages.cc \
> + hugepages.h \
> ink_aiocb.h \
> ink_align.h \
> ink_apidefs.h \
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc
> new file mode 100644
> index 0000000..216bf0b
> --- /dev/null
> +++ b/lib/ts/hugepages.cc
> @@ -0,0 +1,144 @@
> +/** @file
> +
> + @section license License
> +
> + Licensed to the Apache Software Foundation (ASF) under one
> + or more contributor license agreements. See the NOTICE file
> + distributed with this work for additional information
> + regarding copyright ownership. The ASF licenses this file
> + to you under the Apache License, Version 2.0 (the
> + "License"); you may not use this file except in compliance
> + with the License. You may obtain a copy of the License at
> +
> + http://www.apache.org/licenses/LICENSE-2.0
> +
> + Unless required by applicable law or agreed to in writing, software
> + distributed under the License is distributed on an "AS IS" BASIS,
> + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + See the License for the specific language governing permissions and
> + limitations under the License.
> + */
> +
> +#include <cstdio>
> +#include <sys/mman.h>
> +#include "Diags.h"
> +#include "ink_align.h"
> +
> +#define DEBUG_TAG "hugepages"
> +#define MEMINFO_PATH "/proc/meminfo"
> +#define LINE_SIZE 256
> +#define TOKEN "Hugepagesize:"
> +#define TOKEN_SIZE (strlen(TOKEN))
> +
> +static int hugepage_size = -1;
> +static bool hugepage_enabled;
> +
> +size_t
> +ats_hugepage_size(void)
> +{
> +#ifdef MAP_HUGETLB
> + return hugepage_size;
> +#else
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> + return 0;
> +#endif
> +}
> +
> +bool
> +ats_hugepage_enabled(void)
> +{
> +#ifdef MAP_HUGETLB
> + return hugepage_enabled;
> +#else
> + return false;
> +#endif
> +}
> +
> +void
> +ats_hugepage_init(int enabled)
> +{
> +#ifdef MAP_HUGETLB
> + FILE *fp;
> + char line[LINE_SIZE];
> + char *p, *ep;
> +
> + hugepage_size = 0;
> +
> + if (!enabled) {
> + Debug(DEBUG_TAG, "hugepages not enabled");
> + return;
> + }
> +
> + fp = fopen(MEMINFO_PATH, "r");
> +
> + if (fp == NULL) {
> + Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH);
> + return;
> + }
> +
> + while (fgets(line, sizeof(line), fp)) {
> + if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) {
> + p = line + TOKEN_SIZE;
> + while (*p == ' ') {
> + p++;
> + }
> + hugepage_size = strtol(p, &ep, 10);
> + // What other values can this be?
> + if (strncmp(ep, " kB", 4)) {
> + hugepage_size *= 1024;
> + }
> + break;
> + }
> + }
> +
> + fclose(fp);
> +
> + if (hugepage_size) {
> + hugepage_enabled = true;
> + }
> +
> + Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size);
> +#else
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +#endif
> +}
> +
> +void *
> +ats_alloc_hugepage(size_t s)
> +{
> +#ifdef MAP_HUGETLB
> + size_t size;
> + void *mem;
> +
> + size = INK_ALIGN(s, ats_hugepage_size());
> +
> + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
> +
> + if (mem == MAP_FAILED) {
> + Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size);
> + return NULL;
> + }
> +
> + return mem;
> +#else
> + (void)s;
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> + return NULL;
> +#endif
> +}
> +
> +bool
> +ats_free_hugepage(void *ptr, size_t s)
> +{
> +#ifdef MAP_HUGETLB
> + size_t size;
> +
> + size = INK_ALIGN(s, ats_hugepage_size());
> + return (munmap(ptr, size) == 0);
> +#else
> + (void)ptr;
> + (void)s;
> + Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> + return false;
> +#endif
> +}
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h
> new file mode 100644
> index 0000000..812542b
> --- /dev/null
> +++ b/lib/ts/hugepages.h
> @@ -0,0 +1,32 @@
> +/** @file
> +
> + @section license License
> +
> + Licensed to the Apache Software Foundation (ASF) under one
> + or more contributor license agreements. See the NOTICE file
> + distributed with this work for additional information
> + regarding copyright ownership. The ASF licenses this file
> + to you under the Apache License, Version 2.0 (the
> + "License"); you may not use this file except in compliance
> + with the License. You may obtain a copy of the License at
> +
> + http://www.apache.org/licenses/LICENSE-2.0
> +
> + Unless required by applicable law or agreed to in writing, software
> + distributed under the License is distributed on an "AS IS" BASIS,
> + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + See the License for the specific language governing permissions and
> + limitations under the License.
> + */
> +#ifndef _hugepages_h_
> +#define _hugepages_h_
> +
> +#include <cstring>
Why use the c* versions of these headers? We don't do that anywhere else and you are not actually using the names from teh std namespace in this patch either ...
> +
> +size_t ats_hugepage_size(void);
> +bool ats_hugepage_enabled(void);
> +void ats_hugepage_init(int);
> +void *ats_alloc_hugepage(size_t);
> +bool ats_free_hugepage(void *, size_t);
> +
> +#endif
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc
> index e718b3f..0f14b68 100644
> --- a/lib/ts/ink_queue.cc
> +++ b/lib/ts/ink_queue.cc
> @@ -50,6 +50,7 @@
> #include "ink_assert.h"
> #include "ink_queue_ext.h"
> #include "ink_align.h"
> +#include "hugepages.h"
>
> inkcoreapi volatile int64_t fastalloc_mem_in_use = 0;
> inkcoreapi volatile int64_t fastalloc_mem_total = 0;
> @@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32
> /* quick test for power of 2 */
> ink_assert(!(alignment & (alignment - 1)));
> f->alignment = alignment;
> - f->chunk_size = chunk_size;
> // Make sure we align *all* the objects in the allocation, not just the first one
> f->type_size = INK_ALIGN(type_size, alignment);
> + if (ats_hugepage_enabled()) {
> + f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size;
> + } else {
> + f->chunk_size = chunk_size;
> + }
> SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0);
>
> f->used = 0;
> @@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f)
> #ifdef DEBUG
> char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL;
> #endif
> - if (f->alignment)
> - newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> - else
> - newp = ats_malloc(f->chunk_size * type_size);
> + if (ats_hugepage_enabled())
> + newp = ats_alloc_hugepage(f->chunk_size * type_size);
> +
> + if (newp == NULL) {
> + if (f->alignment)
> + newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> + else
> + newp = ats_malloc(f->chunk_size * type_size);
> + }
> ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice);
> -
> fl_memadd(f->chunk_size * type_size);
> #ifdef DEBUG
> newsbrk = (char *)sbrk(0);
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/libts.h b/lib/ts/libts.h
> index f136d74..a99e67f 100644
> --- a/lib/ts/libts.h
> +++ b/lib/ts/libts.h
> @@ -41,6 +41,7 @@
> #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ ***
> */
>
> +#include "hugepages.h"
> #include "ink_config.h"
> #include "ink_platform.h"
> #include "ink_align.h"
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc
> ----------------------------------------------------------------------
> diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
> index ebcb8fd..ec3387a 100644
> --- a/mgmt/RecordsConfig.cc
> +++ b/mgmt/RecordsConfig.cc
> @@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] =
> ,
> {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL}
> ,
> + {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL}
> + ,
I would expect bad things to happen if you change this setting at runtime, so the RECU value should be RECU_RESTART_TS.
>
> //############
> //#
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc
> ----------------------------------------------------------------------
> diff --git a/proxy/Main.cc b/proxy/Main.cc
> index 202da33..4684945 100644
> --- a/proxy/Main.cc
> +++ b/proxy/Main.cc
> @@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
> // Restart syslog now that we have configuration info
> syslog_log_configure();
>
> + // init huge pages
> + int enabled;
> + REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages");
> + ats_hugepage_init(enabled);
> + Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize());
> + Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size());
Could you imagine there being a need to set this separately from RAM cache and for allocators?
J