You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@trafficserver.apache.org by James Peach <jp...@apache.org> on 2013/04/23 23:07:39 UTC

Re: git commit: TS-1817: use the libaio interface to the Linux AIO system call

Weijin,

Can you please review and make sure I didn't break anything? This still needs the change from TS-1821 ...

J


On Apr 23, 2013, at 2:03 PM, jpeach@apache.org wrote:

> Updated Branches:
>  refs/heads/master af7057e32 -> cade10bd7
> 
> 
> TS-1817: use the libaio interface to the Linux AIO system call
> 
> 
> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
> Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/cade10bd
> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/cade10bd
> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/cade10bd
> 
> Branch: refs/heads/master
> Commit: cade10bd7268d12806f97da59ead52c6901fe8bd
> Parents: af7057e
> Author: James Peach <jp...@apache.org>
> Authored: Tue Apr 16 09:57:53 2013 -0700
> Committer: James Peach <jp...@apache.org>
> Committed: Tue Apr 23 13:59:21 2013 -0700
> 
> ----------------------------------------------------------------------
> CHANGES                     |    2 +
> configure.ac                |   21 +++++--
> contrib/manifests/debian.pp |    2 +-
> contrib/manifests/redhat.pp |    2 +-
> iocore/aio/AIO.cc           |   16 +++---
> iocore/aio/I_AIO.h          |  111 +++++++++++---------------------------
> iocore/aio/P_AIO.h          |    6 ++-
> iocore/cache/Cache.cc       |    4 +-
> iocore/cache/CacheWrite.cc  |    7 ++-
> lib/ts/libts.h              |    1 -
> 10 files changed, 68 insertions(+), 104 deletions(-)
> ----------------------------------------------------------------------
> 
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/CHANGES
> ----------------------------------------------------------------------
> diff --git a/CHANGES b/CHANGES
> index 5820c6a..8f89319 100644
> --- a/CHANGES
> +++ b/CHANGES
> @@ -2,6 +2,8 @@
>   Changes with Apache Traffic Server 3.3.3
> 
> 
> +  *) [TS-1817] Use the libaio interface to the Linux AIO system calls.
> +
>   *) [TS-1721] Integrate tstop into the autotools build.
> 
>   *) [TS-1706] Fix documentation for Config::Records.pm.
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/configure.ac
> ----------------------------------------------------------------------
> diff --git a/configure.ac b/configure.ac
> index 591c1b5..0c619e6 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -423,12 +423,21 @@ AC_ARG_ENABLE([linux-native-aio],
>   [enable_linux_native_aio=no]
> )
> 
> -case $host_os in
> -  linux*);;
> -  *) AS_IF([test "x$enable_linux_native_aio" = "xyes"], [
> -      AC_MSG_ERROR([Linux native AIO can only be enabled on Linux systems])
> -    ])
> -esac
> +AS_IF([test "x$enable_linux_native_aio" = "xyes"], [
> +  case $host_os in
> +    linux*) ;;
> +    *) AC_MSG_ERROR([Linux native AIO can only be enabled on Linux systems])
> +  esac
> +
> +  AC_CHECK_HEADERS([libaio.h], [],
> +    [AC_MSG_ERROR([Linux native AIO requires libaio.h])]
> +  )
> +
> +  AC_SEARCH_LIBS([io_submit], [aio], [],
> +    [AC_MSG_ERROR([Linux native AIO requires libaio])]
> +  )
> +
> +])
> 
> AC_MSG_RESULT([$enable_linux_native_aio])
> TS_ARG_ENABLE_VAR([use], [linux_native_aio])
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/contrib/manifests/debian.pp
> ----------------------------------------------------------------------
> diff --git a/contrib/manifests/debian.pp b/contrib/manifests/debian.pp
> index 38b78ff..e22c26c 100644
> --- a/contrib/manifests/debian.pp
> +++ b/contrib/manifests/debian.pp
> @@ -18,7 +18,7 @@
> package {[
>     'gcc', 'g++', 'automake', 'autoconf', 'libtool', 'pkg-config',
>     'libssl-dev', 'tcl-dev', 'libexpat1-dev', 'libpcre3-dev', 'libhwloc-dev',
> -    'libcurl3-dev', 'libncurses5-dev',
> +    'libcurl3-dev', 'libncurses5-dev', 'libaio-dev',
>     'libcap-dev', 'libcap2', 'bison', 'flex', 'make',
>   ]:
>   ensure => latest
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/contrib/manifests/redhat.pp
> ----------------------------------------------------------------------
> diff --git a/contrib/manifests/redhat.pp b/contrib/manifests/redhat.pp
> index 0fae61a..6bb1f28 100644
> --- a/contrib/manifests/redhat.pp
> +++ b/contrib/manifests/redhat.pp
> @@ -18,7 +18,7 @@
> package {[
>     'gcc', 'automake', 'autoconf', 'libtool', 'pkgconfig',
>     'openssl-devel', 'tcl-devel', 'expat-devel', 'pcre-devel',
> -    'ncurses-devel', 'libcurl-devel',
> +    'ncurses-devel', 'libcurl-devel', 'libaio-devel',
>     'hwloc-devel', 'libcap-devel', 'bison', 'flex', 'make',
>   ]:
>   ensure => latest
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/aio/AIO.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc
> index 40bf1b1..769ade6 100644
> --- a/iocore/aio/AIO.cc
> +++ b/iocore/aio/AIO.cc
> @@ -599,8 +599,8 @@ Lagain:
> int
> ink_aio_read(AIOCallback *op, int fromAPI) {
>   op->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> -  op->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
> -  op->aiocb.aio_data = op;
> +  op->aiocb.aio_lio_opcode = IO_CMD_PREAD;
> +  op->aiocb.data = op;
>   this_ethread()->diskHandler->ready_list.enqueue(op);
> 
>   return 1;
> @@ -609,8 +609,8 @@ ink_aio_read(AIOCallback *op, int fromAPI) {
> int
> ink_aio_write(AIOCallback *op, int fromAPI) {
>   op->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> -  op->aiocb.aio_lio_opcode = IOCB_CMD_PWRITE;
> -  op->aiocb.aio_data = op;
> +  op->aiocb.aio_lio_opcode = IO_CMD_PWRITE;
> +  op->aiocb.data = op;
>   this_ethread()->diskHandler->ready_list.enqueue(op);
> 
>   return 1;
> @@ -624,8 +624,8 @@ ink_aio_readv(AIOCallback *op, int fromAPI) {
> 
>   while (io) {
>     io->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> -    io->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
> -    io->aiocb.aio_data = io;
> +    io->aiocb.aio_lio_opcode = IO_CMD_PREAD;
> +    io->aiocb.data = io;
>     dh->ready_list.enqueue(io);
>     ++sz;
>     io = io->then;
> @@ -651,8 +651,8 @@ ink_aio_writev(AIOCallback *op, int fromAPI) {
> 
>   while (io) {
>     io->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> -    io->aiocb.aio_lio_opcode = IOCB_CMD_PWRITE;
> -    io->aiocb.aio_data = io;
> +    io->aiocb.aio_lio_opcode = IO_CMD_PWRITE;
> +    io->aiocb.data = io;
>     dh->ready_list.enqueue(io);
>     ++sz;
>     io = io->then;
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/aio/I_AIO.h
> ----------------------------------------------------------------------
> diff --git a/iocore/aio/I_AIO.h b/iocore/aio/I_AIO.h
> index ef49d93..00af2b3 100644
> --- a/iocore/aio/I_AIO.h
> +++ b/iocore/aio/I_AIO.h
> @@ -54,88 +54,22 @@
> #define AIO_MODE                 AIO_MODE_THREAD
> #endif
> 
> -#if AIO_MODE == AIO_MODE_NATIVE
> -
> -#include <sys/syscall.h>  /* for __NR_* definitions */
> -#include <linux/aio_abi.h>  /* for AIO types and constants */
> -#define MAX_AIO_EVENTS 1024
> -
> -#if defined(__LITTLE_ENDIAN)
> -#if (SIZEOF_VOID_POINTER == 4)
> -#define PADDEDPtr(x, y) x; unsigned y
> -#define PADDEDul(x, y) unsigned long x; unsigned y
> -#elif (SIZEOF_VOID_POINTER == 8)
> -#define PADDEDPtr(x, y) x
> -#define PADDEDul(x, y) unsigned long x
> -#endif
> -#elif defined(__BIG_ENDIAN)
> -#if (SIZEOF_VOID_POINTER == 4)
> -#define PADDEDPtr(x, y) unsigned y; x
> -#define PADDEDul(x, y) unsigned y; unsigned long y
> -#elif (SIZEOF_VOID_POINTER == 8)
> -#define PADDEDPtr(x, y) x
> -#define PADDEDul(x, y) unsigned long x
> -#endif
> -#else
> -#error edit for your odd byteorder.
> -#endif
> -
> -typedef struct ink_iocb {
> -  /* these are internal to the kernel/libc. */
> -  PADDEDPtr(void *aio_data, _pad1); /* data to be returned in event's data */
> -  unsigned PADDED(aio_key, aio_reserved1);
> -        /* the kernel sets aio_key to the req # */
> -
> -  /* common fields */
> -  short aio_lio_opcode; /* see IOCB_CMD_ above */
> -  short aio_reqprio;
> -  int aio_fildes;
> -
> -  PADDEDPtr(void *aio_buf, _pad2);
> -  PADDEDul(aio_nbytes, _pad3);
> -  int64_t aio_offset;
> -
> -  /* extra parameters */
> -  uint64_t aio_reserved2;  /* TODO: use this for a (struct sigevent *) */
> -
> -  /* flags for the "struct iocb" */
> -  int aio_flags;
> -
> -  /*
> -   * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an
> -   * eventfd to signal AIO readiness to
> -   */
> -  int aio_resfd;
> -
> -} ink_aiocb_t;
> +#define LIO_READ        0x1
> +#define LIO_WRITE       0x2
> 
> -typedef struct ink_io_event {
> -  PADDEDPtr(void *data, _pad1);   /* the data field from the iocb */
> -  PADDEDPtr(ink_aiocb_t *obj, _pad2);    /* what iocb this event came from */
> -  PADDEDul(res, _pad3);    /* result code for this event */
> -  PADDEDul(res2, _pad4);   /* secondary result */
> -} ink_io_event_t;
> +#if AIO_MODE == AIO_MODE_NATIVE
> 
> -TS_INLINE int io_setup(unsigned nr, aio_context_t *ctxp)
> -{
> -  return syscall(__NR_io_setup, nr, ctxp);
> -}
> +#include <libaio.h>
> 
> -TS_INLINE int io_destroy(aio_context_t ctx)
> -{
> -  return syscall(__NR_io_destroy, ctx);
> -}
> +#define MAX_AIO_EVENTS 1024
> 
> -TS_INLINE int io_submit(aio_context_t ctx, long nr,  ink_aiocb_t **iocbpp)
> -{
> -  return syscall(__NR_io_submit, ctx, nr, iocbpp);
> -}
> +typedef struct iocb ink_aiocb_t;
> +typedef struct io_event ink_io_event_t;
> 
> -TS_INLINE int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
> -    ink_io_event_t *events, struct timespec *timeout)
> -{
> -  return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
> -}
> +// XXX hokey old-school compatibility with ink_aiocb.h ...
> +#define aio_nbytes  u.c.nbytes
> +#define aio_offset  u.c.offset
> +#define aio_buf     u.c.buf
> 
> struct AIOVec: public Continuation
> {
> @@ -151,10 +85,26 @@ struct AIOVec: public Continuation
> 
>   int mainEvent(int event, Event *e);
> };
> +
> #else
> -typedef ink_aiocb ink_aiocb_t;
> +
> +typedef struct ink_aiocb
> +{
> +  int aio_fildes;
> +  volatile void *aio_buf;       /* buffer location */
> +  size_t aio_nbytes;            /* length of transfer */
> +  off_t aio_offset;             /* file offset */
> +
> +  int aio_reqprio;              /* request priority offset */
> +  int aio_lio_opcode;           /* listio operation */
> +  int aio_state;                /* state flag for List I/O */
> +  int aio__pad[1];              /* extension padding */
> +} ink_aiocb_t;
> +
> bool ink_aio_thread_num_set(int thread_num);
> +
> #endif
> +
> // AIOCallback::thread special values
> #define AIO_CALLBACK_THREAD_ANY ((EThread*)0) // any regular event thread
> #define AIO_CALLBACK_THREAD_AIO ((EThread*)-1)
> @@ -182,7 +132,7 @@ struct AIOCallback: public Continuation
> struct DiskHandler: public Continuation
> {
>   Event *trigger_event;
> -  aio_context_t ctx;
> +  io_context_t ctx;
>   ink_io_event_t events[MAX_AIO_EVENTS];
>   Que(AIOCallback, link) ready_list;
>   Que(AIOCallback, link) complete_list;
> @@ -190,7 +140,7 @@ struct DiskHandler: public Continuation
>   int mainAIOEvent(int event, Event *e);
>   DiskHandler() {
>     SET_HANDLER(&DiskHandler::startAIOEvent);
> -    memset(&ctx, 0, sizeof(aio_context_t));
> +    memset(&ctx, 0, sizeof(ctx));
>     int ret = io_setup(MAX_AIO_EVENTS, &ctx);
>     if (ret < 0) {
>       perror("io_setup error");
> @@ -198,6 +148,7 @@ struct DiskHandler: public Continuation
>   }
> };
> #endif
> +
> void ink_aio_init(ModuleVersion version);
> int ink_aio_start();
> void ink_aio_set_callback(Continuation * error_callback);
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/aio/P_AIO.h
> ----------------------------------------------------------------------
> diff --git a/iocore/aio/P_AIO.h b/iocore/aio/P_AIO.h
> index 9e3321c..fe91132 100644
> --- a/iocore/aio/P_AIO.h
> +++ b/iocore/aio/P_AIO.h
> @@ -78,7 +78,7 @@ AIOCallbackInternal::io_complete(int event, void *data)
> }
> 
> TS_INLINE int
> -AIOVec::mainEvent(int event, Event *e) {
> +AIOVec::mainEvent(int /* event */, Event *) {
>   ++completed;
>   if (completed < size)
>     return EVENT_CONT;
> @@ -92,7 +92,9 @@ AIOVec::mainEvent(int event, Event *e) {
>   ink_assert(!"AIOVec mainEvent err");
>   return EVENT_ERROR;
> }
> -#else
> +
> +#else /* AIO_MODE != AIO_MODE_NATIVE */
> +
> struct AIO_Reqs;
> 
> struct AIOCallbackInternal: public AIOCallback
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/cache/Cache.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
> index 4173f3d..3971cb1 100644
> --- a/iocore/cache/Cache.cc
> +++ b/iocore/cache/Cache.cc
> @@ -1964,7 +1964,7 @@ CacheVC::handleReadDone(int event, Event *e) {
>     if ((!dir_valid(vol, &dir)) || (!io.ok())) {
>       if (!io.ok()) {
>         Debug("cache_disk_error", "Read error on disk %s\n \
> -	    read range : [%" PRIu64 " - %" PRIu64 " bytes]  [%" PRIu64 " - %" PRIu64 " blocks] \n", vol->hash_id, io.aiocb.aio_offset, io.aiocb.aio_offset + io.aiocb.aio_nbytes, io.aiocb.aio_offset / 512, (io.aiocb.aio_offset + io.aiocb.aio_nbytes) / 512);
> +	    read range : [%" PRIu64 " - %" PRIu64 " bytes]  [%" PRIu64 " - %" PRIu64 " blocks] \n", vol->hash_id, (uint64_t)io.aiocb.aio_offset, (uint64_t)io.aiocb.aio_offset + io.aiocb.aio_nbytes, (uint64_t)io.aiocb.aio_offset / 512, (uint64_t)(io.aiocb.aio_offset + io.aiocb.aio_nbytes) / 512);
>       }
>       goto Ldone;
>     }
> @@ -2007,7 +2007,7 @@ CacheVC::handleReadDone(int event, Event *e) {
>         if (checksum != doc->checksum) {
>           Note("cache: checksum error for [%" PRIu64 " %" PRIu64 "] len %d, hlen %d, disk %s, offset %" PRIu64 " size %zu",
>                doc->first_key.b[0], doc->first_key.b[1],
> -               doc->len, doc->hlen, vol->path, io.aiocb.aio_offset, (size_t)io.aiocb.aio_nbytes);
> +               doc->len, doc->hlen, vol->path, (uint64_t)io.aiocb.aio_offset, (size_t)io.aiocb.aio_nbytes);
>           doc->magic = DOC_CORRUPT;
>           okay = 0;
>         }
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/cache/CacheWrite.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/CacheWrite.cc b/iocore/cache/CacheWrite.cc
> index dee62bd..ae87948 100644
> --- a/iocore/cache/CacheWrite.cc
> +++ b/iocore/cache/CacheWrite.cc
> @@ -330,9 +330,10 @@ Vol::aggWriteDone(int event, Event *e)
>     // for fragments is this aggregation buffer
>     Debug("cache_disk_error", "Write error on disk %s\n \
>               write range : [%" PRIu64 " - %" PRIu64 " bytes]  [%" PRIu64 " - %" PRIu64 " blocks] \n",
> -          hash_id, io.aiocb.aio_offset, io.aiocb.aio_offset + io.aiocb.aio_nbytes,
> -          io.aiocb.aio_offset / CACHE_BLOCK_SIZE,
> -          (io.aiocb.aio_offset + io.aiocb.aio_nbytes) / CACHE_BLOCK_SIZE);
> +          hash_id, (uint64_t)io.aiocb.aio_offset,
> +          (uint64_t)io.aiocb.aio_offset + io.aiocb.aio_nbytes,
> +          (uint64_t)io.aiocb.aio_offset / CACHE_BLOCK_SIZE,
> +          (uint64_t)(io.aiocb.aio_offset + io.aiocb.aio_nbytes) / CACHE_BLOCK_SIZE);
>     Dir del_dir;
>     dir_clear(&del_dir);
>     for (int done = 0; done < agg_buf_pos;) {
> 
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/lib/ts/libts.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/libts.h b/lib/ts/libts.h
> index ebb9701..8469bd5 100644
> --- a/lib/ts/libts.h
> +++ b/lib/ts/libts.h
> @@ -43,7 +43,6 @@
> 
> #include "ink_config.h"
> #include "ink_platform.h"
> -#include "ink_aiocb.h"
> #include "ink_align.h"
> #include "ink_apidefs.h"
> #include "ink_args.h"
>