You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@trafficserver.apache.org by James Peach <jp...@apache.org> on 2013/04/23 23:07:39 UTC
Re: git commit: TS-1817: use the libaio interface to the Linux AIO
system call
Weijin,
Can you please review and make sure I didn't break anything? This still needs the change from TS-1821 ...
J
On Apr 23, 2013, at 2:03 PM, jpeach@apache.org wrote:
> Updated Branches:
> refs/heads/master af7057e32 -> cade10bd7
>
>
> TS-1817: use the libaio interface to the Linux AIO system call
>
>
> Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
> Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/cade10bd
> Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/cade10bd
> Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/cade10bd
>
> Branch: refs/heads/master
> Commit: cade10bd7268d12806f97da59ead52c6901fe8bd
> Parents: af7057e
> Author: James Peach <jp...@apache.org>
> Authored: Tue Apr 16 09:57:53 2013 -0700
> Committer: James Peach <jp...@apache.org>
> Committed: Tue Apr 23 13:59:21 2013 -0700
>
> ----------------------------------------------------------------------
> CHANGES | 2 +
> configure.ac | 21 +++++--
> contrib/manifests/debian.pp | 2 +-
> contrib/manifests/redhat.pp | 2 +-
> iocore/aio/AIO.cc | 16 +++---
> iocore/aio/I_AIO.h | 111 +++++++++++---------------------------
> iocore/aio/P_AIO.h | 6 ++-
> iocore/cache/Cache.cc | 4 +-
> iocore/cache/CacheWrite.cc | 7 ++-
> lib/ts/libts.h | 1 -
> 10 files changed, 68 insertions(+), 104 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/CHANGES
> ----------------------------------------------------------------------
> diff --git a/CHANGES b/CHANGES
> index 5820c6a..8f89319 100644
> --- a/CHANGES
> +++ b/CHANGES
> @@ -2,6 +2,8 @@
> Changes with Apache Traffic Server 3.3.3
>
>
> + *) [TS-1817] Use the libaio interface to the Linux AIO system calls.
> +
> *) [TS-1721] Integrate tstop into the autotools build.
>
> *) [TS-1706] Fix documentation for Config::Records.pm.
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/configure.ac
> ----------------------------------------------------------------------
> diff --git a/configure.ac b/configure.ac
> index 591c1b5..0c619e6 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -423,12 +423,21 @@ AC_ARG_ENABLE([linux-native-aio],
> [enable_linux_native_aio=no]
> )
>
> -case $host_os in
> - linux*);;
> - *) AS_IF([test "x$enable_linux_native_aio" = "xyes"], [
> - AC_MSG_ERROR([Linux native AIO can only be enabled on Linux systems])
> - ])
> -esac
> +AS_IF([test "x$enable_linux_native_aio" = "xyes"], [
> + case $host_os in
> + linux*) ;;
> + *) AC_MSG_ERROR([Linux native AIO can only be enabled on Linux systems])
> + esac
> +
> + AC_CHECK_HEADERS([libaio.h], [],
> + [AC_MSG_ERROR([Linux native AIO requires libaio.h])]
> + )
> +
> + AC_SEARCH_LIBS([io_submit], [aio], [],
> + [AC_MSG_ERROR([Linux native AIO requires libaio])]
> + )
> +
> +])
>
> AC_MSG_RESULT([$enable_linux_native_aio])
> TS_ARG_ENABLE_VAR([use], [linux_native_aio])
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/contrib/manifests/debian.pp
> ----------------------------------------------------------------------
> diff --git a/contrib/manifests/debian.pp b/contrib/manifests/debian.pp
> index 38b78ff..e22c26c 100644
> --- a/contrib/manifests/debian.pp
> +++ b/contrib/manifests/debian.pp
> @@ -18,7 +18,7 @@
> package {[
> 'gcc', 'g++', 'automake', 'autoconf', 'libtool', 'pkg-config',
> 'libssl-dev', 'tcl-dev', 'libexpat1-dev', 'libpcre3-dev', 'libhwloc-dev',
> - 'libcurl3-dev', 'libncurses5-dev',
> + 'libcurl3-dev', 'libncurses5-dev', 'libaio-dev',
> 'libcap-dev', 'libcap2', 'bison', 'flex', 'make',
> ]:
> ensure => latest
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/contrib/manifests/redhat.pp
> ----------------------------------------------------------------------
> diff --git a/contrib/manifests/redhat.pp b/contrib/manifests/redhat.pp
> index 0fae61a..6bb1f28 100644
> --- a/contrib/manifests/redhat.pp
> +++ b/contrib/manifests/redhat.pp
> @@ -18,7 +18,7 @@
> package {[
> 'gcc', 'automake', 'autoconf', 'libtool', 'pkgconfig',
> 'openssl-devel', 'tcl-devel', 'expat-devel', 'pcre-devel',
> - 'ncurses-devel', 'libcurl-devel',
> + 'ncurses-devel', 'libcurl-devel', 'libaio-devel',
> 'hwloc-devel', 'libcap-devel', 'bison', 'flex', 'make',
> ]:
> ensure => latest
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/aio/AIO.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/aio/AIO.cc b/iocore/aio/AIO.cc
> index 40bf1b1..769ade6 100644
> --- a/iocore/aio/AIO.cc
> +++ b/iocore/aio/AIO.cc
> @@ -599,8 +599,8 @@ Lagain:
> int
> ink_aio_read(AIOCallback *op, int fromAPI) {
> op->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> - op->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
> - op->aiocb.aio_data = op;
> + op->aiocb.aio_lio_opcode = IO_CMD_PREAD;
> + op->aiocb.data = op;
> this_ethread()->diskHandler->ready_list.enqueue(op);
>
> return 1;
> @@ -609,8 +609,8 @@ ink_aio_read(AIOCallback *op, int fromAPI) {
> int
> ink_aio_write(AIOCallback *op, int fromAPI) {
> op->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> - op->aiocb.aio_lio_opcode = IOCB_CMD_PWRITE;
> - op->aiocb.aio_data = op;
> + op->aiocb.aio_lio_opcode = IO_CMD_PWRITE;
> + op->aiocb.data = op;
> this_ethread()->diskHandler->ready_list.enqueue(op);
>
> return 1;
> @@ -624,8 +624,8 @@ ink_aio_readv(AIOCallback *op, int fromAPI) {
>
> while (io) {
> io->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> - io->aiocb.aio_lio_opcode = IOCB_CMD_PREAD;
> - io->aiocb.aio_data = io;
> + io->aiocb.aio_lio_opcode = IO_CMD_PREAD;
> + io->aiocb.data = io;
> dh->ready_list.enqueue(io);
> ++sz;
> io = io->then;
> @@ -651,8 +651,8 @@ ink_aio_writev(AIOCallback *op, int fromAPI) {
>
> while (io) {
> io->aiocb.aio_reqprio = AIO_DEFAULT_PRIORITY;
> - io->aiocb.aio_lio_opcode = IOCB_CMD_PWRITE;
> - io->aiocb.aio_data = io;
> + io->aiocb.aio_lio_opcode = IO_CMD_PWRITE;
> + io->aiocb.data = io;
> dh->ready_list.enqueue(io);
> ++sz;
> io = io->then;
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/aio/I_AIO.h
> ----------------------------------------------------------------------
> diff --git a/iocore/aio/I_AIO.h b/iocore/aio/I_AIO.h
> index ef49d93..00af2b3 100644
> --- a/iocore/aio/I_AIO.h
> +++ b/iocore/aio/I_AIO.h
> @@ -54,88 +54,22 @@
> #define AIO_MODE AIO_MODE_THREAD
> #endif
>
> -#if AIO_MODE == AIO_MODE_NATIVE
> -
> -#include <sys/syscall.h> /* for __NR_* definitions */
> -#include <linux/aio_abi.h> /* for AIO types and constants */
> -#define MAX_AIO_EVENTS 1024
> -
> -#if defined(__LITTLE_ENDIAN)
> -#if (SIZEOF_VOID_POINTER == 4)
> -#define PADDEDPtr(x, y) x; unsigned y
> -#define PADDEDul(x, y) unsigned long x; unsigned y
> -#elif (SIZEOF_VOID_POINTER == 8)
> -#define PADDEDPtr(x, y) x
> -#define PADDEDul(x, y) unsigned long x
> -#endif
> -#elif defined(__BIG_ENDIAN)
> -#if (SIZEOF_VOID_POINTER == 4)
> -#define PADDEDPtr(x, y) unsigned y; x
> -#define PADDEDul(x, y) unsigned y; unsigned long y
> -#elif (SIZEOF_VOID_POINTER == 8)
> -#define PADDEDPtr(x, y) x
> -#define PADDEDul(x, y) unsigned long x
> -#endif
> -#else
> -#error edit for your odd byteorder.
> -#endif
> -
> -typedef struct ink_iocb {
> - /* these are internal to the kernel/libc. */
> - PADDEDPtr(void *aio_data, _pad1); /* data to be returned in event's data */
> - unsigned PADDED(aio_key, aio_reserved1);
> - /* the kernel sets aio_key to the req # */
> -
> - /* common fields */
> - short aio_lio_opcode; /* see IOCB_CMD_ above */
> - short aio_reqprio;
> - int aio_fildes;
> -
> - PADDEDPtr(void *aio_buf, _pad2);
> - PADDEDul(aio_nbytes, _pad3);
> - int64_t aio_offset;
> -
> - /* extra parameters */
> - uint64_t aio_reserved2; /* TODO: use this for a (struct sigevent *) */
> -
> - /* flags for the "struct iocb" */
> - int aio_flags;
> -
> - /*
> - * if the IOCB_FLAG_RESFD flag of "aio_flags" is set, this is an
> - * eventfd to signal AIO readiness to
> - */
> - int aio_resfd;
> -
> -} ink_aiocb_t;
> +#define LIO_READ 0x1
> +#define LIO_WRITE 0x2
>
> -typedef struct ink_io_event {
> - PADDEDPtr(void *data, _pad1); /* the data field from the iocb */
> - PADDEDPtr(ink_aiocb_t *obj, _pad2); /* what iocb this event came from */
> - PADDEDul(res, _pad3); /* result code for this event */
> - PADDEDul(res2, _pad4); /* secondary result */
> -} ink_io_event_t;
> +#if AIO_MODE == AIO_MODE_NATIVE
>
> -TS_INLINE int io_setup(unsigned nr, aio_context_t *ctxp)
> -{
> - return syscall(__NR_io_setup, nr, ctxp);
> -}
> +#include <libaio.h>
>
> -TS_INLINE int io_destroy(aio_context_t ctx)
> -{
> - return syscall(__NR_io_destroy, ctx);
> -}
> +#define MAX_AIO_EVENTS 1024
>
> -TS_INLINE int io_submit(aio_context_t ctx, long nr, ink_aiocb_t **iocbpp)
> -{
> - return syscall(__NR_io_submit, ctx, nr, iocbpp);
> -}
> +typedef struct iocb ink_aiocb_t;
> +typedef struct io_event ink_io_event_t;
>
> -TS_INLINE int io_getevents(aio_context_t ctx, long min_nr, long max_nr,
> - ink_io_event_t *events, struct timespec *timeout)
> -{
> - return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout);
> -}
> +// XXX hokey old-school compatibility with ink_aiocb.h ...
> +#define aio_nbytes u.c.nbytes
> +#define aio_offset u.c.offset
> +#define aio_buf u.c.buf
>
> struct AIOVec: public Continuation
> {
> @@ -151,10 +85,26 @@ struct AIOVec: public Continuation
>
> int mainEvent(int event, Event *e);
> };
> +
> #else
> -typedef ink_aiocb ink_aiocb_t;
> +
> +typedef struct ink_aiocb
> +{
> + int aio_fildes;
> + volatile void *aio_buf; /* buffer location */
> + size_t aio_nbytes; /* length of transfer */
> + off_t aio_offset; /* file offset */
> +
> + int aio_reqprio; /* request priority offset */
> + int aio_lio_opcode; /* listio operation */
> + int aio_state; /* state flag for List I/O */
> + int aio__pad[1]; /* extension padding */
> +} ink_aiocb_t;
> +
> bool ink_aio_thread_num_set(int thread_num);
> +
> #endif
> +
> // AIOCallback::thread special values
> #define AIO_CALLBACK_THREAD_ANY ((EThread*)0) // any regular event thread
> #define AIO_CALLBACK_THREAD_AIO ((EThread*)-1)
> @@ -182,7 +132,7 @@ struct AIOCallback: public Continuation
> struct DiskHandler: public Continuation
> {
> Event *trigger_event;
> - aio_context_t ctx;
> + io_context_t ctx;
> ink_io_event_t events[MAX_AIO_EVENTS];
> Que(AIOCallback, link) ready_list;
> Que(AIOCallback, link) complete_list;
> @@ -190,7 +140,7 @@ struct DiskHandler: public Continuation
> int mainAIOEvent(int event, Event *e);
> DiskHandler() {
> SET_HANDLER(&DiskHandler::startAIOEvent);
> - memset(&ctx, 0, sizeof(aio_context_t));
> + memset(&ctx, 0, sizeof(ctx));
> int ret = io_setup(MAX_AIO_EVENTS, &ctx);
> if (ret < 0) {
> perror("io_setup error");
> @@ -198,6 +148,7 @@ struct DiskHandler: public Continuation
> }
> };
> #endif
> +
> void ink_aio_init(ModuleVersion version);
> int ink_aio_start();
> void ink_aio_set_callback(Continuation * error_callback);
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/aio/P_AIO.h
> ----------------------------------------------------------------------
> diff --git a/iocore/aio/P_AIO.h b/iocore/aio/P_AIO.h
> index 9e3321c..fe91132 100644
> --- a/iocore/aio/P_AIO.h
> +++ b/iocore/aio/P_AIO.h
> @@ -78,7 +78,7 @@ AIOCallbackInternal::io_complete(int event, void *data)
> }
>
> TS_INLINE int
> -AIOVec::mainEvent(int event, Event *e) {
> +AIOVec::mainEvent(int /* event */, Event *) {
> ++completed;
> if (completed < size)
> return EVENT_CONT;
> @@ -92,7 +92,9 @@ AIOVec::mainEvent(int event, Event *e) {
> ink_assert(!"AIOVec mainEvent err");
> return EVENT_ERROR;
> }
> -#else
> +
> +#else /* AIO_MODE != AIO_MODE_NATIVE */
> +
> struct AIO_Reqs;
>
> struct AIOCallbackInternal: public AIOCallback
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/cache/Cache.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/Cache.cc b/iocore/cache/Cache.cc
> index 4173f3d..3971cb1 100644
> --- a/iocore/cache/Cache.cc
> +++ b/iocore/cache/Cache.cc
> @@ -1964,7 +1964,7 @@ CacheVC::handleReadDone(int event, Event *e) {
> if ((!dir_valid(vol, &dir)) || (!io.ok())) {
> if (!io.ok()) {
> Debug("cache_disk_error", "Read error on disk %s\n \
> - read range : [%" PRIu64 " - %" PRIu64 " bytes] [%" PRIu64 " - %" PRIu64 " blocks] \n", vol->hash_id, io.aiocb.aio_offset, io.aiocb.aio_offset + io.aiocb.aio_nbytes, io.aiocb.aio_offset / 512, (io.aiocb.aio_offset + io.aiocb.aio_nbytes) / 512);
> + read range : [%" PRIu64 " - %" PRIu64 " bytes] [%" PRIu64 " - %" PRIu64 " blocks] \n", vol->hash_id, (uint64_t)io.aiocb.aio_offset, (uint64_t)io.aiocb.aio_offset + io.aiocb.aio_nbytes, (uint64_t)io.aiocb.aio_offset / 512, (uint64_t)(io.aiocb.aio_offset + io.aiocb.aio_nbytes) / 512);
> }
> goto Ldone;
> }
> @@ -2007,7 +2007,7 @@ CacheVC::handleReadDone(int event, Event *e) {
> if (checksum != doc->checksum) {
> Note("cache: checksum error for [%" PRIu64 " %" PRIu64 "] len %d, hlen %d, disk %s, offset %" PRIu64 " size %zu",
> doc->first_key.b[0], doc->first_key.b[1],
> - doc->len, doc->hlen, vol->path, io.aiocb.aio_offset, (size_t)io.aiocb.aio_nbytes);
> + doc->len, doc->hlen, vol->path, (uint64_t)io.aiocb.aio_offset, (size_t)io.aiocb.aio_nbytes);
> doc->magic = DOC_CORRUPT;
> okay = 0;
> }
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/iocore/cache/CacheWrite.cc
> ----------------------------------------------------------------------
> diff --git a/iocore/cache/CacheWrite.cc b/iocore/cache/CacheWrite.cc
> index dee62bd..ae87948 100644
> --- a/iocore/cache/CacheWrite.cc
> +++ b/iocore/cache/CacheWrite.cc
> @@ -330,9 +330,10 @@ Vol::aggWriteDone(int event, Event *e)
> // for fragments is this aggregation buffer
> Debug("cache_disk_error", "Write error on disk %s\n \
> write range : [%" PRIu64 " - %" PRIu64 " bytes] [%" PRIu64 " - %" PRIu64 " blocks] \n",
> - hash_id, io.aiocb.aio_offset, io.aiocb.aio_offset + io.aiocb.aio_nbytes,
> - io.aiocb.aio_offset / CACHE_BLOCK_SIZE,
> - (io.aiocb.aio_offset + io.aiocb.aio_nbytes) / CACHE_BLOCK_SIZE);
> + hash_id, (uint64_t)io.aiocb.aio_offset,
> + (uint64_t)io.aiocb.aio_offset + io.aiocb.aio_nbytes,
> + (uint64_t)io.aiocb.aio_offset / CACHE_BLOCK_SIZE,
> + (uint64_t)(io.aiocb.aio_offset + io.aiocb.aio_nbytes) / CACHE_BLOCK_SIZE);
> Dir del_dir;
> dir_clear(&del_dir);
> for (int done = 0; done < agg_buf_pos;) {
>
> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/cade10bd/lib/ts/libts.h
> ----------------------------------------------------------------------
> diff --git a/lib/ts/libts.h b/lib/ts/libts.h
> index ebb9701..8469bd5 100644
> --- a/lib/ts/libts.h
> +++ b/lib/ts/libts.h
> @@ -43,7 +43,6 @@
>
> #include "ink_config.h"
> #include "ink_platform.h"
> -#include "ink_aiocb.h"
> #include "ink_align.h"
> #include "ink_apidefs.h"
> #include "ink_args.h"
>