You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by so...@apache.org on 2015/06/08 17:40:23 UTC

trafficserver git commit: TS-3122: Add support for hugepages on Linux

Repository: trafficserver
Updated Branches:
  refs/heads/master 1a0832b3b -> bba557870


TS-3122: Add support for hugepages on Linux


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787

Branch: refs/heads/master
Commit: bba557870c05222d302a05ec948871cdde8bf63b
Parents: 1a0832b
Author: Phil Sorber <so...@apache.org>
Authored: Thu Oct 16 19:58:08 2014 -0600
Committer: Phil Sorber <so...@apache.org>
Committed: Mon Jun 8 09:28:56 2015 -0600

----------------------------------------------------------------------
 .../configuration/records.config.en.rst         |  12 ++
 iocore/cache/Cache.cc                           |  10 +-
 iocore/cache/CacheDir.cc                        |  53 +++++--
 iocore/cache/P_CacheDir.h                       |   4 +-
 lib/ts/Makefile.am                              |   2 +
 lib/ts/hugepages.cc                             | 144 +++++++++++++++++++
 lib/ts/hugepages.h                              |  32 +++++
 lib/ts/ink_queue.cc                             |  21 ++-
 lib/ts/libts.h                                  |   1 +
 mgmt/RecordsConfig.cc                           |   2 +
 proxy/Main.cc                                   |   7 +
 11 files changed, 267 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst
----------------------------------------------------------------------
diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst
index 694d338..ccced34 100644
--- a/doc/reference/configuration/records.config.en.rst
+++ b/doc/reference/configuration/records.config.en.rst
@@ -2849,6 +2849,18 @@ Sockets
    Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be
    holding at any one time.
 
+.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0
+
+   Enable (1) the use of huge pages on supported platforms. (Currently only Linux)
+
+   You must also enable hugepages at the OS level. In a modern linux Kernel
+   this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a
+   sufficiently large value. It is reasonable to use (system
+   memory/hugepage size) because these pages are only created on demand.
+
+   For more information on the implications of enabling huge pages, see
+   `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`.
+
 .. ts:cv:: CONFIG proxy.config.http.enabled INT 1
 
    Turn on or off support for HTTP proxying. This is rarely used, the one

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
index becf713..370c516 100644
--- a/iocore/cache/Cache.cc
+++ b/iocore/cache/Cache.cc
@@ -38,6 +38,8 @@
 #include "P_CacheBC.h"
 #endif
 
+#include "hugepages.h"
+
 // Compilation Options
 #define USELESS_REENABLES // allow them for now
 // #define VERIFY_JTEST_DATA
@@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear)
 
   Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len,
         (double)vol_dirlen(this) / (double)this->len * 100.0);
-  raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
+
+  raw_dir = NULL;
+  if (ats_hugepage_enabled())
+    raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this));
+  if (raw_dir == NULL)
+    raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
+
   dir = (Dir *)(raw_dir + vol_headerlen(this));
   header = (VolHeaderFooter *)raw_dir;
   footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter)));

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc
----------------------------------------------------------------------
diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc
index 3a7b9c4..e0f20d3 100644
--- a/iocore/cache/CacheDir.cc
+++ b/iocore/cache/CacheDir.cc
@@ -24,6 +24,8 @@
 
 #include "P_Cache.h"
 
+#include "hugepages.h"
+
 // #define LOOP_CHECK_MODE 1
 #ifdef LOOP_CHECK_MODE
 #define DIR_LOOP_THRESHOLD 1000
@@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void)
   Debug("cache_dir_sync", "sync started");
   char *buf = NULL;
   size_t buflen = 0;
+  bool buf_huge = false;
 
   EThread *t = (EThread *)0xdeadbeef;
   for (int i = 0; i < gnvol; i++) {
@@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void)
 #endif
 
     if (buflen < dirlen) {
-      if (buf)
-        ats_memalign_free(buf);
-      buf = (char *)ats_memalign(ats_pagesize(), dirlen);
+      if (buf) {
+        if (buf_huge)
+          ats_free_hugepage(buf, buflen);
+        else
+          ats_memalign_free(buf);
+      }
       buflen = dirlen;
+      if (ats_hugepage_enabled()) {
+        buf = (char *)ats_alloc_hugepage(buflen);
+        buf_huge = true;
+      }
+      if (buf == NULL) {
+        buf = (char *)ats_memalign(ats_pagesize(), buflen);
+        buf_huge = false;
+      }
     }
 
     if (!d->dir_sync_in_progress) {
@@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void)
     Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
   }
   Debug("cache_dir_sync", "sync done");
-  if (buf)
-    ats_memalign_free(buf);
+  if (buf) {
+    if (buf_huge)
+      ats_free_hugepage(buf, buflen);
+    else
+      ats_memalign_free(buf);
+    buflen = 0;
+    buf = NULL;
+    buf_huge = false;
+  }
 }
 
 
@@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e)
 Lrestart:
   if (vol_idx >= gnvol) {
     vol_idx = 0;
-    if (buf) {
-      ats_memalign_free(buf);
-      buf = 0;
-      buflen = 0;
-    }
     Debug("cache_dir_sync", "sync done");
     if (event == EVENT_INTERVAL)
       trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
@@ -1196,10 +1212,21 @@ Lrestart:
       Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
       vol->header->dirty = 0;
       if (buflen < dirlen) {
-        if (buf)
-          ats_memalign_free(buf);
-        buf = (char *)ats_memalign(ats_pagesize(), dirlen);
+        if (buf) {
+          if (buf_huge)
+            ats_free_hugepage(buf, buflen);
+          else
+            ats_memalign_free(buf);
+        }
         buflen = dirlen;
+        if (ats_hugepage_enabled()) {
+          buf = (char *)ats_alloc_hugepage(buflen);
+          buf_huge = true;
+        }
+        if (buf == NULL) {
+          buf = (char *)ats_memalign(ats_pagesize(), buflen);
+          buf_huge = false;
+        }
       }
       vol->header->sync_serial++;
       vol->footer->sync_serial = vol->header->sync_serial;

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h
----------------------------------------------------------------------
diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h
index 268ecfb..881d6be 100644
--- a/iocore/cache/P_CacheDir.h
+++ b/iocore/cache/P_CacheDir.h
@@ -295,6 +295,7 @@ struct CacheSync : public Continuation {
   int vol_idx;
   char *buf;
   size_t buflen;
+  bool buf_huge;
   off_t writepos;
   AIOCallbackInternal io;
   Event *trigger;
@@ -302,7 +303,8 @@ struct CacheSync : public Continuation {
   int mainEvent(int event, Event *e);
   void aio_write(int fd, char *b, int n, off_t o);
 
-  CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0)
+  CacheSync()
+    : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0)
   {
     SET_HANDLER(&CacheSync::mainEvent);
   }

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am
----------------------------------------------------------------------
diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
index 7e04222..18c18cf 100644
--- a/lib/ts/Makefile.am
+++ b/lib/ts/Makefile.am
@@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \
   defalloc.h \
   fastlz.c \
   fastlz.h \
+  hugepages.cc \
+  hugepages.h \
   ink_aiocb.h \
   ink_align.h \
   ink_apidefs.h \

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc
----------------------------------------------------------------------
diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc
new file mode 100644
index 0000000..216bf0b
--- /dev/null
+++ b/lib/ts/hugepages.cc
@@ -0,0 +1,144 @@
+/** @file
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include <cstdio>
+#include <sys/mman.h>
+#include "Diags.h"
+#include "ink_align.h"
+
+#define DEBUG_TAG "hugepages"
+#define MEMINFO_PATH "/proc/meminfo"
+#define LINE_SIZE 256
+#define TOKEN "Hugepagesize:"
+#define TOKEN_SIZE (strlen(TOKEN))
+
+static int hugepage_size = -1;
+static bool hugepage_enabled;
+
+size_t
+ats_hugepage_size(void)
+{
+#ifdef MAP_HUGETLB
+  return hugepage_size;
+#else
+  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+  return 0;
+#endif
+}
+
+bool
+ats_hugepage_enabled(void)
+{
+#ifdef MAP_HUGETLB
+  return hugepage_enabled;
+#else
+  return false;
+#endif
+}
+
+void
+ats_hugepage_init(int enabled)
+{
+#ifdef MAP_HUGETLB
+  FILE *fp;
+  char line[LINE_SIZE];
+  char *p, *ep;
+
+  hugepage_size = 0;
+
+  if (!enabled) {
+    Debug(DEBUG_TAG, "hugepages not enabled");
+    return;
+  }
+
+  fp = fopen(MEMINFO_PATH, "r");
+
+  if (fp == NULL) {
+    Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH);
+    return;
+  }
+
+  while (fgets(line, sizeof(line), fp)) {
+    if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) {
+      p = line + TOKEN_SIZE;
+      while (*p == ' ') {
+        p++;
+      }
+      hugepage_size = strtol(p, &ep, 10);
+      // What other values can this be?
+      if (strncmp(ep, " kB", 4)) {
+        hugepage_size *= 1024;
+      }
+      break;
+    }
+  }
+
+  fclose(fp);
+
+  if (hugepage_size) {
+    hugepage_enabled = true;
+  }
+
+  Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size);
+#else
+  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+#endif
+}
+
+void *
+ats_alloc_hugepage(size_t s)
+{
+#ifdef MAP_HUGETLB
+  size_t size;
+  void *mem;
+
+  size = INK_ALIGN(s, ats_hugepage_size());
+
+  mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+  if (mem == MAP_FAILED) {
+    Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size);
+    return NULL;
+  }
+
+  return mem;
+#else
+  (void)s;
+  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+  return NULL;
+#endif
+}
+
+bool
+ats_free_hugepage(void *ptr, size_t s)
+{
+#ifdef MAP_HUGETLB
+  size_t size;
+
+  size = INK_ALIGN(s, ats_hugepage_size());
+  return (munmap(ptr, size) == 0);
+#else
+  (void)ptr;
+  (void)s;
+  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
+  return false;
+#endif
+}

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h
----------------------------------------------------------------------
diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h
new file mode 100644
index 0000000..812542b
--- /dev/null
+++ b/lib/ts/hugepages.h
@@ -0,0 +1,32 @@
+/** @file
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+#ifndef _hugepages_h_
+#define _hugepages_h_
+
+#include <cstring>
+
+size_t ats_hugepage_size(void);
+bool ats_hugepage_enabled(void);
+void ats_hugepage_init(int);
+void *ats_alloc_hugepage(size_t);
+bool ats_free_hugepage(void *, size_t);
+
+#endif

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc
----------------------------------------------------------------------
diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc
index e718b3f..0f14b68 100644
--- a/lib/ts/ink_queue.cc
+++ b/lib/ts/ink_queue.cc
@@ -50,6 +50,7 @@
 #include "ink_assert.h"
 #include "ink_queue_ext.h"
 #include "ink_align.h"
+#include "hugepages.h"
 
 inkcoreapi volatile int64_t fastalloc_mem_in_use = 0;
 inkcoreapi volatile int64_t fastalloc_mem_total = 0;
@@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32
   /* quick test for power of 2 */
   ink_assert(!(alignment & (alignment - 1)));
   f->alignment = alignment;
-  f->chunk_size = chunk_size;
   // Make sure we align *all* the objects in the allocation, not just the first one
   f->type_size = INK_ALIGN(type_size, alignment);
+  if (ats_hugepage_enabled()) {
+    f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size;
+  } else {
+    f->chunk_size = chunk_size;
+  }
   SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0);
 
   f->used = 0;
@@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f)
 #ifdef DEBUG
       char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL;
 #endif
-      if (f->alignment)
-        newp = ats_memalign(f->alignment, f->chunk_size * type_size);
-      else
-        newp = ats_malloc(f->chunk_size * type_size);
+      if (ats_hugepage_enabled())
+        newp = ats_alloc_hugepage(f->chunk_size * type_size);
+
+      if (newp == NULL) {
+        if (f->alignment)
+          newp = ats_memalign(f->alignment, f->chunk_size * type_size);
+        else
+          newp = ats_malloc(f->chunk_size * type_size);
+      }
       ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice);
-
       fl_memadd(f->chunk_size * type_size);
 #ifdef DEBUG
       newsbrk = (char *)sbrk(0);

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h
----------------------------------------------------------------------
diff --git a/lib/ts/libts.h b/lib/ts/libts.h
index f136d74..a99e67f 100644
--- a/lib/ts/libts.h
+++ b/lib/ts/libts.h
@@ -41,6 +41,7 @@
 #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ ***
 */
 
+#include "hugepages.h"
 #include "ink_config.h"
 #include "ink_platform.h"
 #include "ink_align.h"

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc
----------------------------------------------------------------------
diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
index ebcb8fd..ec3387a 100644
--- a/mgmt/RecordsConfig.cc
+++ b/mgmt/RecordsConfig.cc
@@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] =
   ,
   {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL}
   ,
+  {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL}
+  ,
 
   //############
   //#

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc
----------------------------------------------------------------------
diff --git a/proxy/Main.cc b/proxy/Main.cc
index 202da33..4684945 100644
--- a/proxy/Main.cc
+++ b/proxy/Main.cc
@@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
   // Restart syslog now that we have configuration info
   syslog_log_configure();
 
+  // init huge pages
+  int enabled;
+  REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages");
+  ats_hugepage_init(enabled);
+  Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize());
+  Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size());
+
   if (!num_accept_threads)
     REC_ReadConfigInteger(num_accept_threads, "proxy.config.accept_threads");
 


Re: trafficserver git commit: TS-3122: Add support for hugepages on Linux

Posted by James Peach <jp...@apache.org>.
> On Jun 8, 2015, at 8:40 AM, sorber@apache.org wrote:
> 
> Repository: trafficserver
> Updated Branches:
>  refs/heads/master 1a0832b3b -> bba557870
> 
> 
> TS-3122: Add support for hugepages on Linux
> 
> 
> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
> Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787
> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787
> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787
> 
> Branch: refs/heads/master
> Commit: bba557870c05222d302a05ec948871cdde8bf63b
> Parents: 1a0832b
> Author: Phil Sorber <so...@apache.org>
> Authored: Thu Oct 16 19:58:08 2014 -0600
> Committer: Phil Sorber <so...@apache.org>
> Committed: Mon Jun 8 09:28:56 2015 -0600
> 
> ----------------------------------------------------------------------
> .../configuration/records.config.en.rst         |  12 ++
> iocore/cache/Cache.cc                           |  10 +-
> iocore/cache/CacheDir.cc                        |  53 +++++--
> iocore/cache/P_CacheDir.h                       |   4 +-
> lib/ts/Makefile.am                              |   2 +
> lib/ts/hugepages.cc                             | 144 +++++++++++++++++++
> lib/ts/hugepages.h                              |  32 +++++
> lib/ts/ink_queue.cc                             |  21 ++-
> lib/ts/libts.h                                  |   1 +
> mgmt/RecordsConfig.cc                           |   2 +
> proxy/Main.cc                                   |   7 +
> 11 files changed, 267 insertions(+), 21 deletions(-)
> ----------------------------------------------------------------------
> 
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst
> ----------------------------------------------------------------------
> diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst
> index 694d338..ccced34 100644
> --- a/doc/reference/configuration/records.config.en.rst
> +++ b/doc/reference/configuration/records.config.en.rst
> @@ -2849,6 +2849,18 @@ Sockets
>    Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be
>    holding at any one time.
> 
> +.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0
> +
> +   Enable (1) the use of huge pages on supported platforms. (Currently only Linux)
> +
> +   You must also enable hugepages at the OS level. In a modern linux Kernel
> +   this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a
> +   sufficiently large value. It is reasonable to use (system
> +   memory/hugepage size) because these pages are only created on demand.
> +
> +   For more information on the implications of enabling huge pages, see
> +   `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`.
> +
> .. ts:cv:: CONFIG proxy.config.http.enabled INT 1

I think it would be very helpful to hear about your experiences with this feature ... how about starting a thread on dev@?

> 
>    Turn on or off support for HTTP proxying. This is rarely used, the one
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
> index becf713..370c516 100644
> --- a/iocore/cache/Cache.cc
> +++ b/iocore/cache/Cache.cc
> @@ -38,6 +38,8 @@
> #include "P_CacheBC.h"
> #endif
> 
> +#include "hugepages.h"
> +
> // Compilation Options
> #define USELESS_REENABLES // allow them for now
> // #define VERIFY_JTEST_DATA
> @@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear)
> 
>   Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len,
>         (double)vol_dirlen(this) / (double)this->len * 100.0);
> -  raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
> +  raw_dir = NULL;
> +  if (ats_hugepage_enabled())
> +    raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this));
> +  if (raw_dir == NULL)
> +    raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
>   dir = (Dir *)(raw_dir + vol_headerlen(this));
>   header = (VolHeaderFooter *)raw_dir;
>   footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter)));
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc
> index 3a7b9c4..e0f20d3 100644
> --- a/iocore/cache/CacheDir.cc
> +++ b/iocore/cache/CacheDir.cc
> @@ -24,6 +24,8 @@
> 
> #include "P_Cache.h"
> 
> +#include "hugepages.h"
> +
> // #define LOOP_CHECK_MODE 1
> #ifdef LOOP_CHECK_MODE
> #define DIR_LOOP_THRESHOLD 1000
> @@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void)
>   Debug("cache_dir_sync", "sync started");
>   char *buf = NULL;
>   size_t buflen = 0;
> +  bool buf_huge = false;
> 
>   EThread *t = (EThread *)0xdeadbeef;
>   for (int i = 0; i < gnvol; i++) {
> @@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void)
> #endif
> 
>     if (buflen < dirlen) {
> -      if (buf)
> -        ats_memalign_free(buf);
> -      buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> +      if (buf) {
> +        if (buf_huge)
> +          ats_free_hugepage(buf, buflen);
> +        else
> +          ats_memalign_free(buf);
> +      }
>       buflen = dirlen;
> +      if (ats_hugepage_enabled()) {
> +        buf = (char *)ats_alloc_hugepage(buflen);
> +        buf_huge = true;
> +      }
> +      if (buf == NULL) {
> +        buf = (char *)ats_memalign(ats_pagesize(), buflen);
> +        buf_huge = false;
> +      }
>     }
> 
>     if (!d->dir_sync_in_progress) {
> @@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void)
>     Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
>   }
>   Debug("cache_dir_sync", "sync done");
> -  if (buf)
> -    ats_memalign_free(buf);
> +  if (buf) {
> +    if (buf_huge)
> +      ats_free_hugepage(buf, buflen);
> +    else
> +      ats_memalign_free(buf);
> +    buflen = 0;
> +    buf = NULL;
> +    buf_huge = false;
> +  }
> }
> 
> 
> @@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e)
> Lrestart:
>   if (vol_idx >= gnvol) {
>     vol_idx = 0;
> -    if (buf) {
> -      ats_memalign_free(buf);
> -      buf = 0;
> -      buflen = 0;
> -    }
>     Debug("cache_dir_sync", "sync done");
>     if (event == EVENT_INTERVAL)
>       trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
> @@ -1196,10 +1212,21 @@ Lrestart:
>       Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
>       vol->header->dirty = 0;
>       if (buflen < dirlen) {
> -        if (buf)
> -          ats_memalign_free(buf);
> -        buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> +        if (buf) {
> +          if (buf_huge)
> +            ats_free_hugepage(buf, buflen);
> +          else
> +            ats_memalign_free(buf);
> +        }
>         buflen = dirlen;
> +        if (ats_hugepage_enabled()) {
> +          buf = (char *)ats_alloc_hugepage(buflen);
> +          buf_huge = true;
> +        }
> +        if (buf == NULL) {
> +          buf = (char *)ats_memalign(ats_pagesize(), buflen);
> +          buf_huge = false;
> +        }
>       }
>       vol->header->sync_serial++;
>       vol->footer->sync_serial = vol->header->sync_serial;
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h
> index 268ecfb..881d6be 100644
> --- a/iocore/cache/P_CacheDir.h
> +++ b/iocore/cache/P_CacheDir.h
> @@ -295,6 +295,7 @@ struct CacheSync : public Continuation {
>   int vol_idx;
>   char *buf;
>   size_t buflen;
> +  bool buf_huge;
>   off_t writepos;
>   AIOCallbackInternal io;
>   Event *trigger;
> @@ -302,7 +303,8 @@ struct CacheSync : public Continuation {
>   int mainEvent(int event, Event *e);
>   void aio_write(int fd, char *b, int n, off_t o);
> 
> -  CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0)
> +  CacheSync()
> +    : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0)
>   {
>     SET_HANDLER(&CacheSync::mainEvent);
>   }
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am
> ----------------------------------------------------------------------
> diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
> index 7e04222..18c18cf 100644
> --- a/lib/ts/Makefile.am
> +++ b/lib/ts/Makefile.am
> @@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \
>   defalloc.h \
>   fastlz.c \
>   fastlz.h \
> +  hugepages.cc \
> +  hugepages.h \
>   ink_aiocb.h \
>   ink_align.h \
>   ink_apidefs.h \
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc
> new file mode 100644
> index 0000000..216bf0b
> --- /dev/null
> +++ b/lib/ts/hugepages.cc
> @@ -0,0 +1,144 @@
> +/** @file
> +
> +  @section license License
> +
> +  Licensed to the Apache Software Foundation (ASF) under one
> +  or more contributor license agreements.  See the NOTICE file
> +  distributed with this work for additional information
> +  regarding copyright ownership.  The ASF licenses this file
> +  to you under the Apache License, Version 2.0 (the
> +  "License"); you may not use this file except in compliance
> +  with the License.  You may obtain a copy of the License at
> +
> +      http://www.apache.org/licenses/LICENSE-2.0
> +
> +  Unless required by applicable law or agreed to in writing, software
> +  distributed under the License is distributed on an "AS IS" BASIS,
> +  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +  See the License for the specific language governing permissions and
> +  limitations under the License.
> + */
> +
> +#include <cstdio>
> +#include <sys/mman.h>
> +#include "Diags.h"
> +#include "ink_align.h"
> +
> +#define DEBUG_TAG "hugepages"
> +#define MEMINFO_PATH "/proc/meminfo"
> +#define LINE_SIZE 256
> +#define TOKEN "Hugepagesize:"
> +#define TOKEN_SIZE (strlen(TOKEN))
> +
> +static int hugepage_size = -1;
> +static bool hugepage_enabled;
> +
> +size_t
> +ats_hugepage_size(void)
> +{
> +#ifdef MAP_HUGETLB
> +  return hugepage_size;
> +#else
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +  return 0;
> +#endif
> +}
> +
> +bool
> +ats_hugepage_enabled(void)
> +{
> +#ifdef MAP_HUGETLB
> +  return hugepage_enabled;
> +#else
> +  return false;
> +#endif
> +}
> +
> +void
> +ats_hugepage_init(int enabled)
> +{
> +#ifdef MAP_HUGETLB
> +  FILE *fp;
> +  char line[LINE_SIZE];
> +  char *p, *ep;
> +
> +  hugepage_size = 0;
> +
> +  if (!enabled) {
> +    Debug(DEBUG_TAG, "hugepages not enabled");
> +    return;
> +  }
> +
> +  fp = fopen(MEMINFO_PATH, "r");
> +
> +  if (fp == NULL) {
> +    Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH);
> +    return;
> +  }
> +
> +  while (fgets(line, sizeof(line), fp)) {
> +    if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) {
> +      p = line + TOKEN_SIZE;
> +      while (*p == ' ') {
> +        p++;
> +      }
> +      hugepage_size = strtol(p, &ep, 10);
> +      // What other values can this be?
> +      if (strncmp(ep, " kB", 4)) {
> +        hugepage_size *= 1024;
> +      }
> +      break;
> +    }
> +  }
> +
> +  fclose(fp);
> +
> +  if (hugepage_size) {
> +    hugepage_enabled = true;
> +  }
> +
> +  Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size);
> +#else
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +#endif
> +}
> +
> +void *
> +ats_alloc_hugepage(size_t s)
> +{
> +#ifdef MAP_HUGETLB
> +  size_t size;
> +  void *mem;
> +
> +  size = INK_ALIGN(s, ats_hugepage_size());
> +
> +  mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
> +
> +  if (mem == MAP_FAILED) {
> +    Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size);
> +    return NULL;
> +  }
> +
> +  return mem;
> +#else
> +  (void)s;
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +  return NULL;
> +#endif
> +}
> +
> +bool
> +ats_free_hugepage(void *ptr, size_t s)
> +{
> +#ifdef MAP_HUGETLB
> +  size_t size;
> +
> +  size = INK_ALIGN(s, ats_hugepage_size());
> +  return (munmap(ptr, size) == 0);
> +#else
> +  (void)ptr;
> +  (void)s;
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +  return false;
> +#endif
> +}
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h
> new file mode 100644
> index 0000000..812542b
> --- /dev/null
> +++ b/lib/ts/hugepages.h
> @@ -0,0 +1,32 @@
> +/** @file
> +
> +  @section license License
> +
> +  Licensed to the Apache Software Foundation (ASF) under one
> +  or more contributor license agreements.  See the NOTICE file
> +  distributed with this work for additional information
> +  regarding copyright ownership.  The ASF licenses this file
> +  to you under the Apache License, Version 2.0 (the
> +  "License"); you may not use this file except in compliance
> +  with the License.  You may obtain a copy of the License at
> +
> +      http://www.apache.org/licenses/LICENSE-2.0
> +
> +  Unless required by applicable law or agreed to in writing, software
> +  distributed under the License is distributed on an "AS IS" BASIS,
> +  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +  See the License for the specific language governing permissions and
> +  limitations under the License.
> + */
> +#ifndef _hugepages_h_
> +#define _hugepages_h_
> +
> +#include <cstring>

Why use the c* versions of these headers? We don't do that anywhere else and you are not actually using the names from teh std namespace in this patch either ...

> +
> +size_t ats_hugepage_size(void);
> +bool ats_hugepage_enabled(void);
> +void ats_hugepage_init(int);
> +void *ats_alloc_hugepage(size_t);
> +bool ats_free_hugepage(void *, size_t);
> +
> +#endif
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc
> index e718b3f..0f14b68 100644
> --- a/lib/ts/ink_queue.cc
> +++ b/lib/ts/ink_queue.cc
> @@ -50,6 +50,7 @@
> #include "ink_assert.h"
> #include "ink_queue_ext.h"
> #include "ink_align.h"
> +#include "hugepages.h"
> 
> inkcoreapi volatile int64_t fastalloc_mem_in_use = 0;
> inkcoreapi volatile int64_t fastalloc_mem_total = 0;
> @@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32
>   /* quick test for power of 2 */
>   ink_assert(!(alignment & (alignment - 1)));
>   f->alignment = alignment;
> -  f->chunk_size = chunk_size;
>   // Make sure we align *all* the objects in the allocation, not just the first one
>   f->type_size = INK_ALIGN(type_size, alignment);
> +  if (ats_hugepage_enabled()) {
> +    f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size;
> +  } else {
> +    f->chunk_size = chunk_size;
> +  }
>   SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0);
> 
>   f->used = 0;
> @@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f)
> #ifdef DEBUG
>       char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL;
> #endif
> -      if (f->alignment)
> -        newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> -      else
> -        newp = ats_malloc(f->chunk_size * type_size);
> +      if (ats_hugepage_enabled())
> +        newp = ats_alloc_hugepage(f->chunk_size * type_size);
> +
> +      if (newp == NULL) {
> +        if (f->alignment)
> +          newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> +        else
> +          newp = ats_malloc(f->chunk_size * type_size);
> +      }
>       ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice);
> -
>       fl_memadd(f->chunk_size * type_size);
> #ifdef DEBUG
>       newsbrk = (char *)sbrk(0);
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/libts.h b/lib/ts/libts.h
> index f136d74..a99e67f 100644
> --- a/lib/ts/libts.h
> +++ b/lib/ts/libts.h
> @@ -41,6 +41,7 @@
> #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ ***
> */
> 
> +#include "hugepages.h"
> #include "ink_config.h"
> #include "ink_platform.h"
> #include "ink_align.h"
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc
> ----------------------------------------------------------------------
> diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
> index ebcb8fd..ec3387a 100644
> --- a/mgmt/RecordsConfig.cc
> +++ b/mgmt/RecordsConfig.cc
> @@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] =
>   ,
>   {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL}
>   ,
> +  {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL}
> +  ,

I would expect bad things to happen if you change this setting at runtime, so the RECU value should be RECU_RESTART_TS.

> 
>   //############
>   //#
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc
> ----------------------------------------------------------------------
> diff --git a/proxy/Main.cc b/proxy/Main.cc
> index 202da33..4684945 100644
> --- a/proxy/Main.cc
> +++ b/proxy/Main.cc
> @@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
>   // Restart syslog now that we have configuration info
>   syslog_log_configure();
> 
> +  // init huge pages
> +  int enabled;
> +  REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages");
> +  ats_hugepage_init(enabled);
> +  Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize());
> +  Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size());

Could you imagine there being a need to set this separately from RAM cache and for allocators?

J


Re: trafficserver git commit: TS-3122: Add support for hugepages on Linux

Posted by James Peach <jp...@apache.org>.
> On Jun 8, 2015, at 8:40 AM, sorber@apache.org wrote:
> 
> Repository: trafficserver
> Updated Branches:
>  refs/heads/master 1a0832b3b -> bba557870
> 
> 
> TS-3122: Add support for hugepages on Linux
> 
> 
> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
> Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/bba55787
> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/bba55787
> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/bba55787
> 
> Branch: refs/heads/master
> Commit: bba557870c05222d302a05ec948871cdde8bf63b
> Parents: 1a0832b
> Author: Phil Sorber <so...@apache.org>
> Authored: Thu Oct 16 19:58:08 2014 -0600
> Committer: Phil Sorber <so...@apache.org>
> Committed: Mon Jun 8 09:28:56 2015 -0600
> 
> ----------------------------------------------------------------------
> .../configuration/records.config.en.rst         |  12 ++
> iocore/cache/Cache.cc                           |  10 +-
> iocore/cache/CacheDir.cc                        |  53 +++++--
> iocore/cache/P_CacheDir.h                       |   4 +-
> lib/ts/Makefile.am                              |   2 +
> lib/ts/hugepages.cc                             | 144 +++++++++++++++++++
> lib/ts/hugepages.h                              |  32 +++++
> lib/ts/ink_queue.cc                             |  21 ++-
> lib/ts/libts.h                                  |   1 +
> mgmt/RecordsConfig.cc                           |   2 +
> proxy/Main.cc                                   |   7 +
> 11 files changed, 267 insertions(+), 21 deletions(-)
> ----------------------------------------------------------------------
> 
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/doc/reference/configuration/records.config.en.rst
> ----------------------------------------------------------------------
> diff --git a/doc/reference/configuration/records.config.en.rst b/doc/reference/configuration/records.config.en.rst
> index 694d338..ccced34 100644
> --- a/doc/reference/configuration/records.config.en.rst
> +++ b/doc/reference/configuration/records.config.en.rst
> @@ -2849,6 +2849,18 @@ Sockets
>    Sets the minimum number of items a ProxyAllocator (per-thread) will guarantee to be
>    holding at any one time.
> 
> +.. ts:cv:: CONFIG proxy.config.allocator.hugepages INT 0
> +
> +   Enable (1) the use of huge pages on supported platforms. (Currently only Linux)
> +
> +   You must also enable hugepages at the OS level. In a modern linux Kernel
> +   this can be done by setting ``/proc/sys/vm/nr_overcommit_hugepages`` to a
> +   sufficiently large value. It is reasonable to use (system
> +   memory/hugepage size) because these pages are only created on demand.
> +
> +   For more information on the implications of enabling huge pages, see
> +   `Wikipedia <http://en.wikipedia.org/wiki/Page_%28computer_memory%29#Page_size_trade-off>_`.
> +
> .. ts:cv:: CONFIG proxy.config.http.enabled INT 1

I think it would be very helpful to hear about your experiences with this feature ... how about starting a thread on dev@?

> 
>    Turn on or off support for HTTP proxying. This is rarely used, the one
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/Cache.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
> index becf713..370c516 100644
> --- a/iocore/cache/Cache.cc
> +++ b/iocore/cache/Cache.cc
> @@ -38,6 +38,8 @@
> #include "P_CacheBC.h"
> #endif
> 
> +#include "hugepages.h"
> +
> // Compilation Options
> #define USELESS_REENABLES // allow them for now
> // #define VERIFY_JTEST_DATA
> @@ -1495,7 +1497,13 @@ Vol::init(char *s, off_t blocks, off_t dir_skip, bool clear)
> 
>   Debug("cache_init", "allocating %zu directory bytes for a %lld byte volume (%lf%%)", vol_dirlen(this), (long long)this->len,
>         (double)vol_dirlen(this) / (double)this->len * 100.0);
> -  raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
> +  raw_dir = NULL;
> +  if (ats_hugepage_enabled())
> +    raw_dir = (char *)ats_alloc_hugepage(vol_dirlen(this));
> +  if (raw_dir == NULL)
> +    raw_dir = (char *)ats_memalign(ats_pagesize(), vol_dirlen(this));
> +
>   dir = (Dir *)(raw_dir + vol_headerlen(this));
>   header = (VolHeaderFooter *)raw_dir;
>   footer = (VolHeaderFooter *)(raw_dir + vol_dirlen(this) - ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter)));
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/CacheDir.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/CacheDir.cc b/iocore/cache/CacheDir.cc
> index 3a7b9c4..e0f20d3 100644
> --- a/iocore/cache/CacheDir.cc
> +++ b/iocore/cache/CacheDir.cc
> @@ -24,6 +24,8 @@
> 
> #include "P_Cache.h"
> 
> +#include "hugepages.h"
> +
> // #define LOOP_CHECK_MODE 1
> #ifdef LOOP_CHECK_MODE
> #define DIR_LOOP_THRESHOLD 1000
> @@ -1011,6 +1013,7 @@ sync_cache_dir_on_shutdown(void)
>   Debug("cache_dir_sync", "sync started");
>   char *buf = NULL;
>   size_t buflen = 0;
> +  bool buf_huge = false;
> 
>   EThread *t = (EThread *)0xdeadbeef;
>   for (int i = 0; i < gnvol; i++) {
> @@ -1077,10 +1080,21 @@ sync_cache_dir_on_shutdown(void)
> #endif
> 
>     if (buflen < dirlen) {
> -      if (buf)
> -        ats_memalign_free(buf);
> -      buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> +      if (buf) {
> +        if (buf_huge)
> +          ats_free_hugepage(buf, buflen);
> +        else
> +          ats_memalign_free(buf);
> +      }
>       buflen = dirlen;
> +      if (ats_hugepage_enabled()) {
> +        buf = (char *)ats_alloc_hugepage(buflen);
> +        buf_huge = true;
> +      }
> +      if (buf == NULL) {
> +        buf = (char *)ats_memalign(ats_pagesize(), buflen);
> +        buf_huge = false;
> +      }
>     }
> 
>     if (!d->dir_sync_in_progress) {
> @@ -1104,8 +1118,15 @@ sync_cache_dir_on_shutdown(void)
>     Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
>   }
>   Debug("cache_dir_sync", "sync done");
> -  if (buf)
> -    ats_memalign_free(buf);
> +  if (buf) {
> +    if (buf_huge)
> +      ats_free_hugepage(buf, buflen);
> +    else
> +      ats_memalign_free(buf);
> +    buflen = 0;
> +    buf = NULL;
> +    buf_huge = false;
> +  }
> }
> 
> 
> @@ -1120,11 +1141,6 @@ CacheSync::mainEvent(int event, Event *e)
> Lrestart:
>   if (vol_idx >= gnvol) {
>     vol_idx = 0;
> -    if (buf) {
> -      ats_memalign_free(buf);
> -      buf = 0;
> -      buflen = 0;
> -    }
>     Debug("cache_dir_sync", "sync done");
>     if (event == EVENT_INTERVAL)
>       trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
> @@ -1196,10 +1212,21 @@ Lrestart:
>       Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
>       vol->header->dirty = 0;
>       if (buflen < dirlen) {
> -        if (buf)
> -          ats_memalign_free(buf);
> -        buf = (char *)ats_memalign(ats_pagesize(), dirlen);
> +        if (buf) {
> +          if (buf_huge)
> +            ats_free_hugepage(buf, buflen);
> +          else
> +            ats_memalign_free(buf);
> +        }
>         buflen = dirlen;
> +        if (ats_hugepage_enabled()) {
> +          buf = (char *)ats_alloc_hugepage(buflen);
> +          buf_huge = true;
> +        }
> +        if (buf == NULL) {
> +          buf = (char *)ats_memalign(ats_pagesize(), buflen);
> +          buf_huge = false;
> +        }
>       }
>       vol->header->sync_serial++;
>       vol->footer->sync_serial = vol->header->sync_serial;
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/iocore/cache/P_CacheDir.h
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/P_CacheDir.h b/iocore/cache/P_CacheDir.h
> index 268ecfb..881d6be 100644
> --- a/iocore/cache/P_CacheDir.h
> +++ b/iocore/cache/P_CacheDir.h
> @@ -295,6 +295,7 @@ struct CacheSync : public Continuation {
>   int vol_idx;
>   char *buf;
>   size_t buflen;
> +  bool buf_huge;
>   off_t writepos;
>   AIOCallbackInternal io;
>   Event *trigger;
> @@ -302,7 +303,8 @@ struct CacheSync : public Continuation {
>   int mainEvent(int event, Event *e);
>   void aio_write(int fd, char *b, int n, off_t o);
> 
> -  CacheSync() : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), writepos(0), trigger(0), start_time(0)
> +  CacheSync()
> +    : Continuation(new_ProxyMutex()), vol_idx(0), buf(0), buflen(0), buf_huge(false), writepos(0), trigger(0), start_time(0)
>   {
>     SET_HANDLER(&CacheSync::mainEvent);
>   }
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/Makefile.am
> ----------------------------------------------------------------------
> diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
> index 7e04222..18c18cf 100644
> --- a/lib/ts/Makefile.am
> +++ b/lib/ts/Makefile.am
> @@ -107,6 +107,8 @@ libtsutil_la_SOURCES = \
>   defalloc.h \
>   fastlz.c \
>   fastlz.h \
> +  hugepages.cc \
> +  hugepages.h \
>   ink_aiocb.h \
>   ink_align.h \
>   ink_apidefs.h \
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.cc b/lib/ts/hugepages.cc
> new file mode 100644
> index 0000000..216bf0b
> --- /dev/null
> +++ b/lib/ts/hugepages.cc
> @@ -0,0 +1,144 @@
> +/** @file
> +
> +  @section license License
> +
> +  Licensed to the Apache Software Foundation (ASF) under one
> +  or more contributor license agreements.  See the NOTICE file
> +  distributed with this work for additional information
> +  regarding copyright ownership.  The ASF licenses this file
> +  to you under the Apache License, Version 2.0 (the
> +  "License"); you may not use this file except in compliance
> +  with the License.  You may obtain a copy of the License at
> +
> +      http://www.apache.org/licenses/LICENSE-2.0
> +
> +  Unless required by applicable law or agreed to in writing, software
> +  distributed under the License is distributed on an "AS IS" BASIS,
> +  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +  See the License for the specific language governing permissions and
> +  limitations under the License.
> + */
> +
> +#include <cstdio>
> +#include <sys/mman.h>
> +#include "Diags.h"
> +#include "ink_align.h"
> +
> +#define DEBUG_TAG "hugepages"
> +#define MEMINFO_PATH "/proc/meminfo"
> +#define LINE_SIZE 256
> +#define TOKEN "Hugepagesize:"
> +#define TOKEN_SIZE (strlen(TOKEN))
> +
> +static int hugepage_size = -1;
> +static bool hugepage_enabled;
> +
> +size_t
> +ats_hugepage_size(void)
> +{
> +#ifdef MAP_HUGETLB
> +  return hugepage_size;
> +#else
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +  return 0;
> +#endif
> +}
> +
> +bool
> +ats_hugepage_enabled(void)
> +{
> +#ifdef MAP_HUGETLB
> +  return hugepage_enabled;
> +#else
> +  return false;
> +#endif
> +}
> +
> +void
> +ats_hugepage_init(int enabled)
> +{
> +#ifdef MAP_HUGETLB
> +  FILE *fp;
> +  char line[LINE_SIZE];
> +  char *p, *ep;
> +
> +  hugepage_size = 0;
> +
> +  if (!enabled) {
> +    Debug(DEBUG_TAG, "hugepages not enabled");
> +    return;
> +  }
> +
> +  fp = fopen(MEMINFO_PATH, "r");
> +
> +  if (fp == NULL) {
> +    Debug(DEBUG_TAG, "Cannot open file %s", MEMINFO_PATH);
> +    return;
> +  }
> +
> +  while (fgets(line, sizeof(line), fp)) {
> +    if (strncmp(line, TOKEN, TOKEN_SIZE) == 0) {
> +      p = line + TOKEN_SIZE;
> +      while (*p == ' ') {
> +        p++;
> +      }
> +      hugepage_size = strtol(p, &ep, 10);
> +      // What other values can this be?
> +      if (strncmp(ep, " kB", 4)) {
> +        hugepage_size *= 1024;
> +      }
> +      break;
> +    }
> +  }
> +
> +  fclose(fp);
> +
> +  if (hugepage_size) {
> +    hugepage_enabled = true;
> +  }
> +
> +  Debug(DEBUG_TAG, "Hugepage size = %d", hugepage_size);
> +#else
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +#endif
> +}
> +
> +void *
> +ats_alloc_hugepage(size_t s)
> +{
> +#ifdef MAP_HUGETLB
> +  size_t size;
> +  void *mem;
> +
> +  size = INK_ALIGN(s, ats_hugepage_size());
> +
> +  mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
> +
> +  if (mem == MAP_FAILED) {
> +    Debug(DEBUG_TAG, "Could not allocate hugepages size = %zu", size);
> +    return NULL;
> +  }
> +
> +  return mem;
> +#else
> +  (void)s;
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +  return NULL;
> +#endif
> +}
> +
> +bool
> +ats_free_hugepage(void *ptr, size_t s)
> +{
> +#ifdef MAP_HUGETLB
> +  size_t size;
> +
> +  size = INK_ALIGN(s, ats_hugepage_size());
> +  return (munmap(ptr, size) == 0);
> +#else
> +  (void)ptr;
> +  (void)s;
> +  Debug(DEBUG_TAG, "MAP_HUGETLB not defined");
> +  return false;
> +#endif
> +}
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/hugepages.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/hugepages.h b/lib/ts/hugepages.h
> new file mode 100644
> index 0000000..812542b
> --- /dev/null
> +++ b/lib/ts/hugepages.h
> @@ -0,0 +1,32 @@
> +/** @file
> +
> +  @section license License
> +
> +  Licensed to the Apache Software Foundation (ASF) under one
> +  or more contributor license agreements.  See the NOTICE file
> +  distributed with this work for additional information
> +  regarding copyright ownership.  The ASF licenses this file
> +  to you under the Apache License, Version 2.0 (the
> +  "License"); you may not use this file except in compliance
> +  with the License.  You may obtain a copy of the License at
> +
> +      http://www.apache.org/licenses/LICENSE-2.0
> +
> +  Unless required by applicable law or agreed to in writing, software
> +  distributed under the License is distributed on an "AS IS" BASIS,
> +  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +  See the License for the specific language governing permissions and
> +  limitations under the License.
> + */
> +#ifndef _hugepages_h_
> +#define _hugepages_h_
> +
> +#include <cstring>

Why use the c* versions of these headers? We don't do that anywhere else and you are not actually using the names from teh std namespace in this patch either ...

> +
> +size_t ats_hugepage_size(void);
> +bool ats_hugepage_enabled(void);
> +void ats_hugepage_init(int);
> +void *ats_alloc_hugepage(size_t);
> +bool ats_free_hugepage(void *, size_t);
> +
> +#endif
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/ink_queue.cc
> ----------------------------------------------------------------------
> diff --git a/lib/ts/ink_queue.cc b/lib/ts/ink_queue.cc
> index e718b3f..0f14b68 100644
> --- a/lib/ts/ink_queue.cc
> +++ b/lib/ts/ink_queue.cc
> @@ -50,6 +50,7 @@
> #include "ink_assert.h"
> #include "ink_queue_ext.h"
> #include "ink_align.h"
> +#include "hugepages.h"
> 
> inkcoreapi volatile int64_t fastalloc_mem_in_use = 0;
> inkcoreapi volatile int64_t fastalloc_mem_total = 0;
> @@ -100,9 +101,13 @@ ink_freelist_init(InkFreeList **fl, const char *name, uint32_t type_size, uint32
>   /* quick test for power of 2 */
>   ink_assert(!(alignment & (alignment - 1)));
>   f->alignment = alignment;
> -  f->chunk_size = chunk_size;
>   // Make sure we align *all* the objects in the allocation, not just the first one
>   f->type_size = INK_ALIGN(type_size, alignment);
> +  if (ats_hugepage_enabled()) {
> +    f->chunk_size = INK_ALIGN(chunk_size * f->type_size, ats_hugepage_size()) / f->type_size;
> +  } else {
> +    f->chunk_size = chunk_size;
> +  }
>   SET_FREELIST_POINTER_VERSION(f->head, FROM_PTR(0), 0);
> 
>   f->used = 0;
> @@ -171,12 +176,16 @@ ink_freelist_new(InkFreeList *f)
> #ifdef DEBUG
>       char *oldsbrk = (char *)sbrk(0), *newsbrk = NULL;
> #endif
> -      if (f->alignment)
> -        newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> -      else
> -        newp = ats_malloc(f->chunk_size * type_size);
> +      if (ats_hugepage_enabled())
> +        newp = ats_alloc_hugepage(f->chunk_size * type_size);
> +
> +      if (newp == NULL) {
> +        if (f->alignment)
> +          newp = ats_memalign(f->alignment, f->chunk_size * type_size);
> +        else
> +          newp = ats_malloc(f->chunk_size * type_size);
> +      }
>       ats_madvise((caddr_t)newp, f->chunk_size * type_size, f->advice);
> -
>       fl_memadd(f->chunk_size * type_size);
> #ifdef DEBUG
>       newsbrk = (char *)sbrk(0);
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/lib/ts/libts.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/libts.h b/lib/ts/libts.h
> index f136d74..a99e67f 100644
> --- a/lib/ts/libts.h
> +++ b/lib/ts/libts.h
> @@ -41,6 +41,7 @@
> #define std *** _FIXME_REMOVE_DEPENDENCY_ON_THE_STL_ ***
> */
> 
> +#include "hugepages.h"
> #include "ink_config.h"
> #include "ink_platform.h"
> #include "ink_align.h"
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/mgmt/RecordsConfig.cc
> ----------------------------------------------------------------------
> diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
> index ebcb8fd..ec3387a 100644
> --- a/mgmt/RecordsConfig.cc
> +++ b/mgmt/RecordsConfig.cc
> @@ -2079,6 +2079,8 @@ static const RecordElement RecordsConfig[] =
>   ,
>   {RECT_CONFIG, "proxy.config.allocator.debug_filter", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-3]", RECA_NULL}
>   ,
> +  {RECT_CONFIG, "proxy.config.allocator.hugepages", RECD_INT, "0", RECU_NULL, RR_NULL, RECC_NULL, "[0-1]", RECA_NULL}
> +  ,

I would expect bad things to happen if you change this setting at runtime, so the RECU value should be RECU_RESTART_TS.

> 
>   //############
>   //#
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/bba55787/proxy/Main.cc
> ----------------------------------------------------------------------
> diff --git a/proxy/Main.cc b/proxy/Main.cc
> index 202da33..4684945 100644
> --- a/proxy/Main.cc
> +++ b/proxy/Main.cc
> @@ -1458,6 +1458,13 @@ main(int /* argc ATS_UNUSED */, const char **argv)
>   // Restart syslog now that we have configuration info
>   syslog_log_configure();
> 
> +  // init huge pages
> +  int enabled;
> +  REC_ReadConfigInteger(enabled, "proxy.config.allocator.hugepages");
> +  ats_hugepage_init(enabled);
> +  Debug("hugepages", "ats_pagesize reporting %zu", ats_pagesize());
> +  Debug("hugepages", "ats_hugepage_size reporting %zu", ats_hugepage_size());

Could you imagine there being a need to set this separately from RAM cache and for allocators?

J