You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2020/03/26 20:54:19 UTC

[kudu] branch master updated: gutil: bump up linux-syscall-support.h

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a46332  gutil: bump up linux-syscall-support.h
0a46332 is described below

commit 0a46332b2129cb256cf9d55c091643d96267724f
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Fri Mar 20 09:59:05 2020 +0000

    gutil: bump up linux-syscall-support.h
    
    Bump up linux-syscall-support.h to newest version
    which support aarch64 - commit fd00dbb from
    https://chromium.googlesource.com/linux-syscall-support/
    
    This is a clean application of IMPALA-9538 (modulo
    the paths of the source files being different).
    
    Change-Id: I6c46acb17f048890a3f93fc6b910b2df3c1a7058
    Reviewed-on: http://gerrit.cloudera.org:8080/15557
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/gutil/linux_syscall_support.h | 2722 +++++++++++++++++++++-----------
 src/kudu/gutil/spinlock_linux-inl.h    |   18 +-
 src/kudu/util/debug-util.cc            |    3 +-
 src/kudu/util/notification.h           |    4 +-
 4 files changed, 1801 insertions(+), 946 deletions(-)

diff --git a/src/kudu/gutil/linux_syscall_support.h b/src/kudu/gutil/linux_syscall_support.h
index 5476d0b..a177cfb 100644
--- a/src/kudu/gutil/linux_syscall_support.h
+++ b/src/kudu/gutil/linux_syscall_support.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2005-2008, Google Inc.
+/* Copyright (c) 2005-2011, Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -66,6 +66,15 @@
  *   results in prefixes "sys[0..9]_". It is also possible to set this
  *   macro to -1, which avoids all prefixes.
  *
+ * SYS_SYSCALL_ENTRYPOINT:
+ *   Some applications (such as sandboxes that filter system calls), need
+ *   to be able to run custom-code each time a system call is made. If this
+ *   macro is defined, it expands to the name of a "common" symbol. If
+ *   this symbol is assigned a non-NULL pointer value, it is used as the
+ *   address of the system call entrypoint.
+ *   A pointer to this symbol can be obtained by calling
+ *   get_syscall_entrypoint()
+ *
  * This file defines a few internal symbols that all start with "LSS_".
  * Do not access these symbols from outside this file. They are not part
  * of the supported API.
@@ -73,11 +82,14 @@
 #ifndef SYS_LINUX_SYSCALL_SUPPORT_H
 #define SYS_LINUX_SYSCALL_SUPPORT_H
 
-/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux.
+/* We currently only support x86-32, x86-64, ARM, MIPS, PPC, s390 and s390x
+ * on Linux.
  * Porting to other related platforms should not be difficult.
  */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-     defined(__mips__) || defined(__PPC__)) && defined(__linux)
+#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) ||   \
+     defined(__mips__) || defined(__PPC__) || defined(__ARM_EABI__) || \
+     defined(__aarch64__) || defined(__s390__)) \
+  && (defined(__linux) || defined(__ANDROID__))
 
 #ifndef SYS_CPLUSPLUS
 #ifdef __cplusplus
@@ -89,23 +101,76 @@ extern "C" {
 #endif
 
 #include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
 #include <signal.h>
 #include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
 #include <string.h>
 #include <sys/ptrace.h>
 #include <sys/resource.h>
 #include <sys/time.h>
 #include <sys/types.h>
-#include <syscall.h>
+#include <sys/syscall.h>
 #include <unistd.h>
 #include <linux/unistd.h>
 #include <endian.h>
 
 #ifdef __mips__
 /* Include definitions of the ABI currently in use.                          */
+#ifdef __ANDROID__
+/* Android doesn't have sgidefs.h, but does have asm/sgidefs.h,
+ * which has the definitions we need.
+ */
+#include <asm/sgidefs.h>
+#else
 #include <sgidefs.h>
 #endif
+#endif
+#endif
 
+/* Some libcs, for example Android NDK and musl, #define these
+ * macros as aliases to their non-64 counterparts. To avoid naming
+ * conflict, remove them.
+ *
+ * These are restored by the corresponding #pragma pop_macro near
+ * the end of this file.
+ */
+#pragma push_macro("stat64")
+#pragma push_macro("fstat64")
+#pragma push_macro("lstat64")
+#pragma push_macro("pread64")
+#pragma push_macro("pwrite64")
+#pragma push_macro("getdents64")
+#undef stat64
+#undef fstat64
+#undef lstat64
+#undef pread64
+#undef pwrite64
+#undef getdents64
+
+#if defined(__ANDROID__) && defined(__x86_64__)
+// A number of x86_64 syscalls are blocked by seccomp on recent Android;
+// undefine them so that modern alternatives will be used instead where
+// possible.
+// The alternative syscalls have been sanity checked against linux-3.4+;
+// older versions might not work.
+# undef __NR_getdents
+# undef __NR_dup2
+# undef __NR_fork
+# undef __NR_getpgrp
+# undef __NR_open
+# undef __NR_poll
+# undef __NR_readlink
+# undef __NR_stat
+# undef __NR_unlink
+# undef __NR_pipe
+#endif
+
+#if defined(__ANDROID__)
+// waitpid is blocked by seccomp on all architectures on recent Android.
+# undef __NR_waitpid
 #endif
 
 /* As glibc often provides subtly incompatible data structures (and implicit
@@ -147,12 +212,17 @@ struct kernel_dirent64 {
 };
 
 /* include/linux/dirent.h                                                    */
+#if !defined(__NR_getdents)
+// when getdents is not available, getdents64 is used for both.
+#define kernel_dirent kernel_dirent64
+#else
 struct kernel_dirent {
   long               d_ino;
   long               d_off;
   unsigned short     d_reclen;
   char               d_name[256];
 };
+#endif
 
 /* include/linux/uio.h                                                       */
 struct kernel_iovec {
@@ -216,26 +286,14 @@ struct kernel_rusage {
   long               ru_nivcsw;
 };
 
-/* include/linux/capablilty.h                                                */
-struct kernel_cap_user_header {
-  unsigned int version;
-  int pid;
-};
-
-struct kernel_cap_user_data {
-  unsigned int effective;
-  unsigned int permitted;
-  unsigned int inheritable;
-};
-
-struct siginfo;
-#if defined(__i386__) || defined(__arm__) || defined(__PPC__)
+#if defined(__i386__) || defined(__ARM_EABI__) || defined(__ARM_ARCH_3__) \
+  || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__))
 
 /* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
 struct kernel_old_sigaction {
   union {
     void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
   };
   unsigned long      sa_mask;
   unsigned long      sa_flags;
@@ -243,6 +301,8 @@ struct kernel_old_sigaction {
 } __attribute__((packed,aligned(4)));
 #elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
   #define kernel_old_sigaction kernel_sigaction
+#elif defined(__aarch64__)
+  // No kernel_old_sigaction defined for arm64.
 #endif
 
 /* Some kernel functions (e.g. sigaction() in 2.6.23) require that the
@@ -260,7 +320,7 @@ struct kernel_old_sigaction {
 #define KERNEL_NSIG  64
 #endif
 
-/* include/asm-{arm,i386,mips,x86_64}/signal.h                               */
+/* include/asm-{arm,aarch64,i386,mips,x86_64}/signal.h                       */
 struct kernel_sigset_t {
   unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/
                     (8*sizeof(unsigned long))];
@@ -272,13 +332,13 @@ struct kernel_sigaction {
   unsigned long      sa_flags;
   union {
     void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
   };
   struct kernel_sigset_t sa_mask;
 #else
   union {
     void             (*sa_handler_)(int);
-    void             (*sa_sigaction_)(int, struct siginfo *, void *);
+    void             (*sa_sigaction_)(int, siginfo_t *, void *);
   };
   unsigned long      sa_flags;
   void               (*sa_restorer)(void);
@@ -292,7 +352,7 @@ struct kernel_sockaddr {
   char               sa_data[14];
 };
 
-/* include/asm-{arm,i386,mips,ppc}/stat.h                                    */
+/* include/asm-{arm,aarch64,i386,mips,ppc,s390}/stat.h                       */
 #ifdef __mips__
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 struct kernel_stat {
@@ -319,7 +379,7 @@ struct kernel_stat64 {
   unsigned           __pad2;
   unsigned long long st_blocks;
 };
-#elif defined __PPC__ && !defined __PPC64__
+#elif defined __PPC__
 struct kernel_stat64 {
   unsigned long long st_dev;
   unsigned long long st_ino;
@@ -365,8 +425,8 @@ struct kernel_stat64 {
 };
 #endif
 
-/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h                             */
-#if defined(__i386__) || defined(__arm__)
+/* include/asm-{arm,aarch64,i386,mips,x86_64,ppc,s390}/stat.h                */
+#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__)
 struct kernel_stat {
   /* The kernel headers suggest that st_dev and st_rdev should be 32bit
    * quantities encoding 12bit major and 20bit minor numbers in an interleaved
@@ -396,39 +456,34 @@ struct kernel_stat {
 };
 #elif defined(__x86_64__)
 struct kernel_stat {
-  unsigned long      st_dev;
-  unsigned long      st_ino;
-  unsigned long      st_nlink;
+  uint64_t           st_dev;
+  uint64_t           st_ino;
+  uint64_t           st_nlink;
   unsigned           st_mode;
   unsigned           st_uid;
   unsigned           st_gid;
   unsigned           __pad0;
-  unsigned long      st_rdev;
-  long               st_size;
-  long               st_blksize;
-  long               st_blocks;
-  unsigned long      st_atime_;
-  unsigned long      st_atime_nsec_;
-  unsigned long      st_mtime_;
-  unsigned long      st_mtime_nsec_;
-  unsigned long      st_ctime_;
-  unsigned long      st_ctime_nsec_;
-  long               __unused[3];
+  uint64_t           st_rdev;
+  int64_t            st_size;
+  int64_t            st_blksize;
+  int64_t            st_blocks;
+  uint64_t           st_atime_;
+  uint64_t           st_atime_nsec_;
+  uint64_t           st_mtime_;
+  uint64_t           st_mtime_nsec_;
+  uint64_t           st_ctime_;
+  uint64_t           st_ctime_nsec_;
+  int64_t            __unused4[3];
 };
 #elif defined(__PPC__)
 struct kernel_stat {
-  unsigned long      st_dev;
+  unsigned           st_dev;
   unsigned long      st_ino;      // ino_t
-#ifdef __PPC64__
-  unsigned long      st_nlink;    // nlink_t
-  unsigned int       st_mode;     // mode_t
-#else
-  unsigned int       st_mode;     // mode_t
+  unsigned long      st_mode;     // mode_t
   unsigned short     st_nlink;    // nlink_t
-#endif
-  unsigned int       st_uid;      // uid_t
-  unsigned int       st_gid;      // gid_t
-  unsigned long      st_rdev;
+  unsigned           st_uid;      // uid_t
+  unsigned           st_gid;      // gid_t
+  unsigned           st_rdev;
   long               st_size;     // off_t
   unsigned long      st_blksize;
   unsigned long      st_blocks;
@@ -440,9 +495,6 @@ struct kernel_stat {
   unsigned long      st_ctime_nsec_;
   unsigned long      __unused4;
   unsigned long      __unused5;
-#ifdef __PPC64__
-  unsigned long      __unused6;
-#endif
 };
 #elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
 struct kernel_stat {
@@ -467,9 +519,76 @@ struct kernel_stat {
   int                st_blocks;
   int                st_pad4[14];
 };
+#elif defined(__aarch64__)
+struct kernel_stat {
+  unsigned long      st_dev;
+  unsigned long      st_ino;
+  unsigned int       st_mode;
+  unsigned int       st_nlink;
+  unsigned int       st_uid;
+  unsigned int       st_gid;
+  unsigned long      st_rdev;
+  unsigned long      __pad1;
+  long               st_size;
+  int                st_blksize;
+  int                __pad2;
+  long               st_blocks;
+  long               st_atime_;
+  unsigned long      st_atime_nsec_;
+  long               st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  long               st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned int       __unused4;
+  unsigned int       __unused5;
+};
+#elif defined(__s390x__)
+struct kernel_stat {
+  unsigned long      st_dev;
+  unsigned long      st_ino;
+  unsigned long      st_nlink;
+  unsigned int       st_mode;
+  unsigned int       st_uid;
+  unsigned int       st_gid;
+  unsigned int       __pad1;
+  unsigned long      st_rdev;
+  unsigned long      st_size;
+  unsigned long      st_atime_;
+  unsigned long      st_atime_nsec_;
+  unsigned long      st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  unsigned long      st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned long      st_blksize;
+  long               st_blocks;
+  unsigned long      __unused[3];
+};
+#elif defined(__s390__)
+struct kernel_stat {
+  unsigned short     st_dev;
+  unsigned short     __pad1;
+  unsigned long      st_ino;
+  unsigned short     st_mode;
+  unsigned short     st_nlink;
+  unsigned short     st_uid;
+  unsigned short     st_gid;
+  unsigned short     st_rdev;
+  unsigned short     __pad2;
+  unsigned long      st_size;
+  unsigned long      st_blksize;
+  unsigned long      st_blocks;
+  unsigned long      st_atime_;
+  unsigned long      st_atime_nsec_;
+  unsigned long      st_mtime_;
+  unsigned long      st_mtime_nsec_;
+  unsigned long      st_ctime_;
+  unsigned long      st_ctime_nsec_;
+  unsigned long      __unused4;
+  unsigned long      __unused5;
+};
 #endif
 
-/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h                           */
+/* include/asm-{arm,aarch64,i386,mips,x86_64,ppc,s390}/statfs.h              */
 #ifdef __mips__
 #if _MIPS_SIM != _MIPS_SIM_ABI64
 struct kernel_statfs64 {
@@ -487,6 +606,22 @@ struct kernel_statfs64 {
   unsigned long      f_spare[6];
 };
 #endif
+#elif defined(__s390__)
+/* See also arch/s390/include/asm/compat.h                                   */
+struct kernel_statfs64 {
+  unsigned int       f_type;
+  unsigned int       f_bsize;
+  unsigned long long f_blocks;
+  unsigned long long f_bfree;
+  unsigned long long f_bavail;
+  unsigned long long f_files;
+  unsigned long long f_ffree;
+  struct { int val[2]; } f_fsid;
+  unsigned int       f_namelen;
+  unsigned int       f_frsize;
+  unsigned int       f_flags;
+  unsigned int       f_spare[4];
+};
 #elif !defined(__x86_64__)
 struct kernel_statfs64 {
   unsigned long      f_type;
@@ -503,7 +638,7 @@ struct kernel_statfs64 {
 };
 #endif
 
-/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h                   */
+/* include/asm-{arm,i386,mips,x86_64,ppc,generic,s390}/statfs.h              */
 #ifdef __mips__
 struct kernel_statfs {
   long               f_type;
@@ -518,11 +653,40 @@ struct kernel_statfs {
   long               f_namelen;
   long               f_spare[6];
 };
-#else
+#elif defined(__x86_64__)
 struct kernel_statfs {
   /* x86_64 actually defines all these fields as signed, whereas all other  */
   /* platforms define them as unsigned. Leaving them at unsigned should not */
-  /* cause any problems.                                                    */
+  /* cause any problems. Make sure these are 64-bit even on x32.            */
+  uint64_t           f_type;
+  uint64_t           f_bsize;
+  uint64_t           f_blocks;
+  uint64_t           f_bfree;
+  uint64_t           f_bavail;
+  uint64_t           f_files;
+  uint64_t           f_ffree;
+  struct { int val[2]; } f_fsid;
+  uint64_t           f_namelen;
+  uint64_t           f_frsize;
+  uint64_t           f_spare[5];
+};
+#elif defined(__s390__)
+struct kernel_statfs {
+  unsigned int       f_type;
+  unsigned int       f_bsize;
+  unsigned long      f_blocks;
+  unsigned long      f_bfree;
+  unsigned long      f_bavail;
+  unsigned long      f_files;
+  unsigned long      f_ffree;
+  struct { int val[2]; } f_fsid;
+  unsigned int       f_namelen;
+  unsigned int       f_frsize;
+  unsigned int       f_flags;
+  unsigned int       f_spare[4];
+};
+#else
+struct kernel_statfs {
   unsigned long      f_type;
   unsigned long      f_bsize;
   unsigned long      f_blocks;
@@ -537,52 +701,10 @@ struct kernel_statfs {
 };
 #endif
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-    defined(__PPC__)
-/* include/linux/aio_abi.h                                                   */
-/* Layout depends on big/little endian.                                      */
-struct kernel_iocb {
-  unsigned long long aio_data;
-  unsigned int       aio_key;
-  unsigned int       aio_reserved;
-  unsigned short     aio_lio_opcode;
-           short     aio_reqprio;
-  unsigned int       aio_filedes;
-  unsigned long long aio_buf;
-  unsigned long long aio_nbytes;
-  unsigned long long aio_offset;
-  unsigned long long aio_reserved2;
-  unsigned int       aio_flags;
-  unsigned int       aio_resfd;
-};
-#elif defined(__PPC__)
-struct kernel_iocb {
-  unsigned long long aio_data;
-  unsigned int       aio_reserved;
-  unsigned int       aio_key;
-  unsigned short     aio_lio_opcode;
-           short     aio_reqprio;
-  unsigned int       aio_fildes;
-  unsigned long long aio_buf;
-  unsigned long long aio_nbytes;
-  unsigned long long aio_offset;
-  unsigned long long aio_reserved2;
-  unsigned int       aio_flags;
-  unsigned int       aio_resfd;
-};
-#endif
-
-/* include/linux/aio_abi.h                                                   */
-struct kernel_io_event {
-  unsigned long long data;
-  unsigned long long obj;
-           long long res;
-           long long res2;
-};
 
 /* Definitions missing from the standard header files                        */
 #ifndef O_DIRECTORY
-#if defined(__arm__) || defined(__PPC_)
+#if defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || defined(__aarch64__)
 #define O_DIRECTORY             0040000
 #else
 #define O_DIRECTORY             0200000
@@ -635,11 +757,12 @@ struct kernel_io_event {
 #endif
 #ifndef MAKE_PROCESS_CPUCLOCK
 #define MAKE_PROCESS_CPUCLOCK(pid, clock)                                     \
-        ((~(int)(pid) << 3) | (int)(clock))
+        ((int)(~(unsigned)(pid) << 3) | (int)(clock))
 #endif
 #ifndef MAKE_THREAD_CPUCLOCK
 #define MAKE_THREAD_CPUCLOCK(tid, clock)                                      \
-        ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK))
+        ((int)(~(unsigned)(tid) << 3) |                                       \
+         (int)((clock) | CPUCLOCK_PERTHREAD_MASK))
 #endif
 
 #ifndef FUTEX_WAIT
@@ -711,12 +834,6 @@ struct kernel_io_event {
 #endif
 
 #if defined(__i386__)
-#ifndef __NR_mount
-#define __NR_mount               21
-#endif
-#ifndef __NR_setgroups32
-#define __NR_setgroups32         81
-#endif
 #ifndef __NR_quotactl
 #define __NR_quotactl           131
 #endif
@@ -795,13 +912,6 @@ struct kernel_io_event {
 #define __NR_sched_setaffinity  241
 #define __NR_sched_getaffinity  242
 #endif
-#ifndef __NR_io_setup
-#define __NR_io_setup           245
-#define __NR_io_destroy         246
-#define __NR_io_getevents       247
-#define __NR_io_submit          248
-#define __NR_io_cancel          249
-#endif
 #ifndef __NR_set_tid_address
 #define __NR_set_tid_address    258
 #endif
@@ -835,9 +945,6 @@ struct kernel_io_event {
 #ifndef __NR_unlinkat
 #define __NR_unlinkat           301
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            310
-#endif
 #ifndef __NR_move_pages
 #define __NR_move_pages         317
 #endif
@@ -847,37 +954,8 @@ struct kernel_io_event {
 #ifndef __NR_fallocate
 #define __NR_fallocate          324
 #endif
-#ifndef __NR_preadv
-#define __NR_preadv             333
-#endif
-#ifndef __NR_pwritev
-#define __NR_pwritev            334
-#endif
-#ifndef __NR_setns
-#define __NR_setns              346
-#endif
 /* End of i386 definitions                                                   */
-#elif defined(__arm__)
-#ifndef __syscall
-#if defined(__thumb__) || defined(__ARM_EABI__)
-#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
-#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
-#define __syscall(name) "swi\t0"
-#define __syscall_safe(name)                     \
-  "push  {r7}\n"                                 \
-  "mov   r7,%[sysreg]\n"                         \
-  __syscall(name)"\n"                            \
-  "pop   {r7}"
-#else
-#define __SYS_REG(name)
-#define __SYS_REG_LIST(regs...) regs
-#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
-#define __syscall_safe(name) __syscall(name)
-#endif
-#endif
-#ifndef __NR_mount
-#define __NR_mount              (__NR_SYSCALL_BASE + 21)
-#endif
+#elif defined(__ARM_ARCH_3__) || defined(__ARM_EABI__)
 #ifndef __NR_setresuid
 #define __NR_setresuid          (__NR_SYSCALL_BASE + 164)
 #define __NR_getresuid          (__NR_SYSCALL_BASE + 165)
@@ -897,9 +975,6 @@ struct kernel_io_event {
 #ifndef __NR_pwrite64
 #define __NR_pwrite64           (__NR_SYSCALL_BASE + 181)
 #endif
-#ifndef __NR_capset
-#define __NR_capset             (__NR_SYSCALL_BASE + 185)
-#endif
 #ifndef __NR_ugetrlimit
 #define __NR_ugetrlimit         (__NR_SYSCALL_BASE + 191)
 #endif
@@ -909,9 +984,6 @@ struct kernel_io_event {
 #ifndef __NR_fstat64
 #define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
 #endif
-#ifndef __NR_setgroups32
-#define __NR_setgroups32        (__NR_SYSCALL_BASE + 206)
-#endif
 #ifndef __NR_setresuid32
 #define __NR_setresuid32        (__NR_SYSCALL_BASE + 208)
 #define __NR_getresuid32        (__NR_SYSCALL_BASE + 209)
@@ -980,16 +1052,110 @@ struct kernel_io_event {
 #ifndef __NR_ioprio_get
 #define __NR_ioprio_get         (__NR_SYSCALL_BASE + 315)
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            (__NR_SYSCALL_BASE + 337)
-#endif
 #ifndef __NR_move_pages
 #define __NR_move_pages         (__NR_SYSCALL_BASE + 344)
 #endif
-#ifndef __NR_setns
-#define __NR_setns              (__NR_SYSCALL_BASE + 375)
+#ifndef __NR_getcpu
+#define __NR_getcpu             (__NR_SYSCALL_BASE + 345)
+#endif
+/* End of ARM 3/EABI definitions                                             */
+#elif defined(__aarch64__)
+#ifndef __NR_setxattr
+#define __NR_setxattr             5
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr            6
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr             8
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr            9
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr           11
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr          12
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set          30
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get          31
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat            35
+#endif
+#ifndef __NR_fallocate
+#define __NR_fallocate           47
+#endif
+#ifndef __NR_openat
+#define __NR_openat              56
+#endif
+#ifndef __NR_quotactl
+#define __NR_quotactl            60
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64          61
+#endif
+#ifndef __NR_getdents
+// when getdents is not available, getdents64 is used for both.
+#define __NR_getdents            __NR_getdents64
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64             67
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64            68
+#endif
+#ifndef __NR_ppoll
+#define __NR_ppoll               73
+#endif
+#ifndef __NR_readlinkat
+#define __NR_readlinkat          78
+#endif
+#ifndef __NR_newfstatat
+#define __NR_newfstatat          79
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address     96
 #endif
-/* End of ARM definitions                                                  */
+#ifndef __NR_futex
+#define __NR_futex               98
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      113
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       114
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  122
+#define __NR_sched_getaffinity  123
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              130
+#endif
+#ifndef __NR_setresuid
+#define __NR_setresuid          147
+#define __NR_getresuid          148
+#define __NR_setresgid          149
+#define __NR_getresgid          150
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             178
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          213
+#endif
+#ifndef __NR_fadvise64
+#define __NR_fadvise64          223
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         239
+#endif
+/* End of aarch64 definitions                                                */
 #elif defined(__x86_64__)
 #ifndef __NR_pread64
 #define __NR_pread64             17
@@ -1003,9 +1169,6 @@ struct kernel_io_event {
 #define __NR_setresgid          119
 #define __NR_getresgid          120
 #endif
-#ifndef __NR_mount
-#define __NR_mount              165
-#endif
 #ifndef __NR_quotactl
 #define __NR_quotactl           179
 #endif
@@ -1043,16 +1206,13 @@ struct kernel_io_event {
 #define __NR_sched_setaffinity  203
 #define __NR_sched_getaffinity  204
 #endif
-#ifndef __NR_io_setup
-#define __NR_io_setup           206
-#define __NR_io_destroy         207
-#define __NR_io_getevents       208
-#define __NR_io_submit          209
-#define __NR_io_cancel          210
-#endif
 #ifndef __NR_getdents64
 #define __NR_getdents64         217
 #endif
+#ifndef __NR_getdents
+// when getdents is not available, getdents64 is used for both.
+#define __NR_getdents           __NR_getdents64
+#endif
 #ifndef __NR_set_tid_address
 #define __NR_set_tid_address    218
 #endif
@@ -1080,30 +1240,15 @@ struct kernel_io_event {
 #ifndef __NR_unlinkat
 #define __NR_unlinkat           263
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            272
-#endif
 #ifndef __NR_move_pages
 #define __NR_move_pages         279
 #endif
 #ifndef __NR_fallocate
 #define __NR_fallocate          285
 #endif
-#ifndef __NR_preadv
-#define __NR_preadv             295
-#endif
-#ifndef __NR_pwritev
-#define __NR_pwritev            296
-#endif
-#ifndef __NR_setns
-#define __NR_setns              308
-#endif
 /* End of x86-64 definitions                                                 */
 #elif defined(__mips__)
 #if _MIPS_SIM == _MIPS_SIM_ABI32
-#ifndef __NR_mount
-#define __NR_mount              (__NR_Linux + 21)
-#endif
 #ifndef __NR_setresuid
 #define __NR_setresuid          (__NR_Linux + 185)
 #define __NR_getresuid          (__NR_Linux + 186)
@@ -1123,9 +1268,6 @@ struct kernel_io_event {
 #ifndef __NR_pwrite64
 #define __NR_pwrite64           (__NR_Linux + 201)
 #endif
-#ifndef __NR_capset
-#define __NR_capset             (__NR_Linux + 205)
-#endif
 #ifndef __NR_stat64
 #define __NR_stat64             (__NR_Linux + 213)
 #endif
@@ -1193,9 +1335,6 @@ struct kernel_io_event {
 #ifndef __NR_unlinkat
 #define __NR_unlinkat           (__NR_Linux + 294)
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            (__NR_Linux + 303)
-#endif
 #ifndef __NR_move_pages
 #define __NR_move_pages         (__NR_Linux + 308)
 #endif
@@ -1208,9 +1347,6 @@ struct kernel_io_event {
 #ifndef __NR_ioprio_get
 #define __NR_ioprio_get         (__NR_Linux + 315)
 #endif
-#ifndef __NR_setns
-#define __NR_setns              (__NR_Linux + 344)
-#endif
 /* End of MIPS (old 32bit API) definitions */
 #elif  _MIPS_SIM == _MIPS_SIM_ABI64
 #ifndef __NR_pread64
@@ -1225,12 +1361,6 @@ struct kernel_io_event {
 #define __NR_setresgid          (__NR_Linux + 117)
 #define __NR_getresgid          (__NR_Linux + 118)
 #endif
-#ifndef __NR_capset
-#define __NR_capset             (__NR_Linux + 124)
-#endif
-#ifndef __NR_mount
-#define __NR_mount              (__NR_Linux + 160)
-#endif
 #ifndef __NR_gettid
 #define __NR_gettid             (__NR_Linux + 178)
 #endif
@@ -1283,9 +1413,6 @@ struct kernel_io_event {
 #ifndef __NR_unlinkat
 #define __NR_unlinkat           (__NR_Linux + 253)
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            (__NR_Linux + 262)
-#endif
 #ifndef __NR_move_pages
 #define __NR_move_pages         (__NR_Linux + 267)
 #endif
@@ -1298,23 +1425,14 @@ struct kernel_io_event {
 #ifndef __NR_ioprio_get
 #define __NR_ioprio_get         (__NR_Linux + 274)
 #endif
-#ifndef __NR_setns
-#define __NR_setns              (__NR_Linux + 303)
-#endif
 /* End of MIPS (64bit API) definitions */
 #else
-#ifndef __NR_mount
-#define __NR_mount              (__NR_Linux + 160)
-#endif
 #ifndef __NR_setresuid
 #define __NR_setresuid          (__NR_Linux + 115)
 #define __NR_getresuid          (__NR_Linux + 116)
 #define __NR_setresgid          (__NR_Linux + 117)
 #define __NR_getresgid          (__NR_Linux + 118)
 #endif
-#ifndef __NR_capset
-#define __NR_capset             (__NR_Linux + 124)
-#endif
 #ifndef __NR_gettid
 #define __NR_gettid             (__NR_Linux + 178)
 #endif
@@ -1373,9 +1491,6 @@ struct kernel_io_event {
 #ifndef __NR_unlinkat
 #define __NR_unlinkat           (__NR_Linux + 257)
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            (__NR_Linux + 266)
-#endif
 #ifndef __NR_move_pages
 #define __NR_move_pages         (__NR_Linux + 271)
 #endif
@@ -1388,16 +1503,10 @@ struct kernel_io_event {
 #ifndef __NR_ioprio_get
 #define __NR_ioprio_get         (__NR_Linux + 278)
 #endif
-#ifndef __NR_setns
-#define __NR_setns              (__NR_Linux + 308)
-#endif
 /* End of MIPS (new 32bit API) definitions                                   */
 #endif
 /* End of MIPS definitions                                                   */
 #elif defined(__PPC__)
-#ifndef __NR_mount
-#define __NR_mount              21
-#endif
 #ifndef __NR_setfsuid
 #define __NR_setfsuid           138
 #define __NR_setfsgid           139
@@ -1421,23 +1530,18 @@ struct kernel_io_event {
 #ifndef __NR_pwrite64
 #define __NR_pwrite64           180
 #endif
-#ifndef __NR_capset
-#define __NR_capset             184
-#endif
 #ifndef __NR_ugetrlimit
 #define __NR_ugetrlimit         190
 #endif
 #ifndef __NR_readahead
 #define __NR_readahead          191
 #endif
-#ifndef __PPC64__
 #ifndef __NR_stat64
 #define __NR_stat64             195
 #endif
 #ifndef __NR_fstat64
 #define __NR_fstat64            197
 #endif
-#endif /* !defined(__PPC64__) */
 #ifndef __NR_getdents64
 #define __NR_getdents64         202
 #endif
@@ -1487,28 +1591,21 @@ struct kernel_io_event {
 #ifndef __NR_fstatfs64
 #define __NR_fstatfs64          253
 #endif
-#ifndef __PPC64__
 #ifndef __NR_fadvise64_64
 #define __NR_fadvise64_64       254
 #endif
-#endif /* !defined(__PPC64__) */
 #ifndef __NR_ioprio_set
 #define __NR_ioprio_set         273
 #endif
 #ifndef __NR_ioprio_get
 #define __NR_ioprio_get         274
 #endif
-#ifndef __NR_unshare
-#define __NR_unshare            282
-#endif
 #ifndef __NR_openat
 #define __NR_openat             286
 #endif
-#ifndef __PPC64__
 #ifndef __NR_fstatat64
 #define __NR_fstatat64          291
 #endif
-#endif /* !defined(__PPC64__) */
 #ifndef __NR_unlinkat
 #define __NR_unlinkat           292
 #endif
@@ -1518,75 +1615,256 @@ struct kernel_io_event {
 #ifndef __NR_getcpu
 #define __NR_getcpu             302
 #endif
-#ifndef __NR_setns
-#define __NR_setns              350
-#endif
 /* End of powerpc defininitions                                              */
+#elif defined(__s390__)
+#ifndef __NR_quotactl
+#define __NR_quotactl           131
 #endif
-
-
-/* After forking, we must make sure to only call system calls.               */
-#if __BOUNDED_POINTERS__
-  #error "Need to port invocations of syscalls for bounded ptrs"
-#else
-  /* The core dumper and the thread lister get executed after threads
-   * have been suspended. As a consequence, we cannot call any functions
-   * that acquire locks. Unfortunately, libc wraps most system calls
-   * (e.g. in order to implement pthread_atfork, and to make calls
-   * cancellable), which means we cannot call these functions. Instead,
-   * we have to call syscall() directly.
-   */
-  #undef LSS_ERRNO
-  #ifdef SYS_ERRNO
-    /* Allow the including file to override the location of errno. This can
-     * be useful when using clone() with the CLONE_VM option.
-     */
-    #define LSS_ERRNO SYS_ERRNO
-  #else
-    #define LSS_ERRNO errno
-  #endif
-
-  #undef LSS_INLINE
-  #ifdef SYS_INLINE
-    #define LSS_INLINE SYS_INLINE
-  #else
-    #define LSS_INLINE static inline
-  #endif
-
-  /* Allow the including file to override the prefix used for all new
-   * system calls. By default, it will be set to "sys_".
-   */
-  #undef LSS_NAME
-  #ifndef SYS_PREFIX
-    #define LSS_NAME(name) sys_##name
-  #elif SYS_PREFIX < 0
-    #define LSS_NAME(name) name
-  #elif SYS_PREFIX == 0
-    #define LSS_NAME(name) sys0_##name
-  #elif SYS_PREFIX == 1
-    #define LSS_NAME(name) sys1_##name
-  #elif SYS_PREFIX == 2
-    #define LSS_NAME(name) sys2_##name
-  #elif SYS_PREFIX == 3
-    #define LSS_NAME(name) sys3_##name
-  #elif SYS_PREFIX == 4
-    #define LSS_NAME(name) sys4_##name
-  #elif SYS_PREFIX == 5
-    #define LSS_NAME(name) sys5_##name
-  #elif SYS_PREFIX == 6
-    #define LSS_NAME(name) sys6_##name
-  #elif SYS_PREFIX == 7
-    #define LSS_NAME(name) sys7_##name
-  #elif SYS_PREFIX == 8
-    #define LSS_NAME(name) sys8_##name
-  #elif SYS_PREFIX == 9
-    #define LSS_NAME(name) sys9_##name
-  #endif
-
-  #undef  LSS_RETURN
-  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__))
-  /* Failing system calls return a negative result in the range of
-   * -1..-4095. These are "errno" values with the sign inverted.
+#ifndef __NR_rt_sigreturn
+#define __NR_rt_sigreturn       173
+#endif
+#ifndef __NR_rt_sigaction
+#define __NR_rt_sigaction       174
+#endif
+#ifndef __NR_rt_sigprocmask
+#define __NR_rt_sigprocmask     175
+#endif
+#ifndef __NR_rt_sigpending
+#define __NR_rt_sigpending      176
+#endif
+#ifndef __NR_rt_sigsuspend
+#define __NR_rt_sigsuspend      179
+#endif
+#ifndef __NR_pread64
+#define __NR_pread64            180
+#endif
+#ifndef __NR_pwrite64
+#define __NR_pwrite64           181
+#endif
+#ifndef __NR_getdents64
+#define __NR_getdents64         220
+#endif
+#ifndef __NR_readahead
+#define __NR_readahead          222
+#endif
+#ifndef __NR_setxattr
+#define __NR_setxattr           224
+#endif
+#ifndef __NR_lsetxattr
+#define __NR_lsetxattr          225
+#endif
+#ifndef __NR_getxattr
+#define __NR_getxattr           227
+#endif
+#ifndef __NR_lgetxattr
+#define __NR_lgetxattr          228
+#endif
+#ifndef __NR_listxattr
+#define __NR_listxattr          230
+#endif
+#ifndef __NR_llistxattr
+#define __NR_llistxattr         231
+#endif
+#ifndef __NR_gettid
+#define __NR_gettid             236
+#endif
+#ifndef __NR_tkill
+#define __NR_tkill              237
+#endif
+#ifndef __NR_futex
+#define __NR_futex              238
+#endif
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity  239
+#endif
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity  240
+#endif
+#ifndef __NR_set_tid_address
+#define __NR_set_tid_address    252
+#endif
+#ifndef __NR_clock_gettime
+#define __NR_clock_gettime      260
+#endif
+#ifndef __NR_clock_getres
+#define __NR_clock_getres       261
+#endif
+#ifndef __NR_statfs64
+#define __NR_statfs64           265
+#endif
+#ifndef __NR_fstatfs64
+#define __NR_fstatfs64          266
+#endif
+#ifndef __NR_ioprio_set
+#define __NR_ioprio_set         282
+#endif
+#ifndef __NR_ioprio_get
+#define __NR_ioprio_get         283
+#endif
+#ifndef __NR_openat
+#define __NR_openat             288
+#endif
+#ifndef __NR_unlinkat
+#define __NR_unlinkat           294
+#endif
+#ifndef __NR_move_pages
+#define __NR_move_pages         310
+#endif
+#ifndef __NR_getcpu
+#define __NR_getcpu             311
+#endif
+#ifndef __NR_fallocate
+#define __NR_fallocate          314
+#endif
+/* Some syscalls are named/numbered differently between s390 and s390x. */
+#ifdef __s390x__
+# ifndef __NR_getrlimit
+# define __NR_getrlimit          191
+# endif
+# ifndef __NR_setresuid
+# define __NR_setresuid          208
+# endif
+# ifndef __NR_getresuid
+# define __NR_getresuid          209
+# endif
+# ifndef __NR_setresgid
+# define __NR_setresgid          210
+# endif
+# ifndef __NR_getresgid
+# define __NR_getresgid          211
+# endif
+# ifndef __NR_setfsuid
+# define __NR_setfsuid           215
+# endif
+# ifndef __NR_setfsgid
+# define __NR_setfsgid           216
+# endif
+# ifndef __NR_fadvise64
+# define __NR_fadvise64          253
+# endif
+# ifndef __NR_newfstatat
+# define __NR_newfstatat         293
+# endif
+#else /* __s390x__ */
+# ifndef __NR_getrlimit
+# define __NR_getrlimit          76
+# endif
+# ifndef __NR_setfsuid
+# define __NR_setfsuid           138
+# endif
+# ifndef __NR_setfsgid
+# define __NR_setfsgid           139
+# endif
+# ifndef __NR_setresuid
+# define __NR_setresuid          164
+# endif
+# ifndef __NR_getresuid
+# define __NR_getresuid          165
+# endif
+# ifndef __NR_setresgid
+# define __NR_setresgid          170
+# endif
+# ifndef __NR_getresgid
+# define __NR_getresgid          171
+# endif
+# ifndef __NR_ugetrlimit
+# define __NR_ugetrlimit         191
+# endif
+# ifndef __NR_mmap2
+# define __NR_mmap2              192
+# endif
+# ifndef __NR_setresuid32
+# define __NR_setresuid32        208
+# endif
+# ifndef __NR_getresuid32
+# define __NR_getresuid32        209
+# endif
+# ifndef __NR_setresgid32
+# define __NR_setresgid32        210
+# endif
+# ifndef __NR_getresgid32
+# define __NR_getresgid32        211
+# endif
+# ifndef __NR_setfsuid32
+# define __NR_setfsuid32         215
+# endif
+# ifndef __NR_setfsgid32
+# define __NR_setfsgid32         216
+# endif
+# ifndef __NR_fadvise64_64
+# define __NR_fadvise64_64       264
+# endif
+# ifndef __NR_fstatat64
+# define __NR_fstatat64          293
+# endif
+#endif /* __s390__ */
+/* End of s390/s390x definitions                                             */
+#endif
+
+
+/* After forking, we must make sure to only call system calls.               */
+#if defined(__BOUNDED_POINTERS__)
+  #error "Need to port invocations of syscalls for bounded ptrs"
+#else
+  /* The core dumper and the thread lister get executed after threads
+   * have been suspended. As a consequence, we cannot call any functions
+   * that acquire locks. Unfortunately, libc wraps most system calls
+   * (e.g. in order to implement pthread_atfork, and to make calls
+   * cancellable), which means we cannot call these functions. Instead,
+   * we have to call syscall() directly.
+   */
+  #undef LSS_ERRNO
+  #ifdef SYS_ERRNO
+    /* Allow the including file to override the location of errno. This can
+     * be useful when using clone() with the CLONE_VM option.
+     */
+    #define LSS_ERRNO SYS_ERRNO
+  #else
+    #define LSS_ERRNO errno
+  #endif
+
+  #undef LSS_INLINE
+  #ifdef SYS_INLINE
+    #define LSS_INLINE SYS_INLINE
+  #else
+    #define LSS_INLINE static inline
+  #endif
+
+  /* Allow the including file to override the prefix used for all new
+   * system calls. By default, it will be set to "sys_".
+   */
+  #undef LSS_NAME
+  #ifndef SYS_PREFIX
+    #define LSS_NAME(name) sys_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX < 0
+    #define LSS_NAME(name) name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 0
+    #define LSS_NAME(name) sys0_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 1
+    #define LSS_NAME(name) sys1_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 2
+    #define LSS_NAME(name) sys2_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 3
+    #define LSS_NAME(name) sys3_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 4
+    #define LSS_NAME(name) sys4_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 5
+    #define LSS_NAME(name) sys5_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 6
+    #define LSS_NAME(name) sys6_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 7
+    #define LSS_NAME(name) sys7_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 8
+    #define LSS_NAME(name) sys8_##name
+  #elif defined(SYS_PREFIX) && SYS_PREFIX == 9
+    #define LSS_NAME(name) sys9_##name
+  #endif
+
+  #undef  LSS_RETURN
+  #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) \
+       || defined(__ARM_EABI__) || defined(__aarch64__) || defined(__s390__))
+  /* Failing system calls return a negative result in the range of
+   * -1..-4095. These are "errno" values with the sign inverted.
    */
   #define LSS_RETURN(type, res)                                               \
     do {                                                                      \
@@ -1603,7 +1881,8 @@ struct kernel_io_event {
   #define LSS_RETURN(type, res, err)                                          \
     do {                                                                      \
       if (err) {                                                              \
-        LSS_ERRNO = (res);                                                    \
+        unsigned long __errnovalue = (res);                                   \
+        LSS_ERRNO = __errnovalue;                                             \
         res = -1;                                                             \
       }                                                                       \
       return (type) (res);                                                    \
@@ -1622,15 +1901,6 @@ struct kernel_io_event {
    } while (0)
   #endif
   #if defined(__i386__)
-    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
-      /* This only works for GCC-4.4 and above -- the first version to use
-         .cfi directives for dwarf unwind info.  */
-      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \
-                  ".cfi_adjust_cfa_offset " #adjust "\n"
-    #else
-      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
-    #endif
-
     /* In PIC mode (e.g. when building shared libraries), gcc for i386
      * reserves ebx. Unfortunately, most distribution ship with implementations
      * of _syscallX() which clobber ebx.
@@ -1639,15 +1909,58 @@ struct kernel_io_event {
      * at optimizing across __asm__ calls.
      * So, we just have to redefine all of the _syscallX() macros.
      */
+    #undef LSS_ENTRYPOINT
+    #ifdef SYS_SYSCALL_ENTRYPOINT
+    static inline void (**LSS_NAME(get_syscall_entrypoint)(void))(void) {
+      void (**entrypoint)(void);
+      asm volatile(".bss\n"
+                   ".align 8\n"
+                   ".globl " SYS_SYSCALL_ENTRYPOINT "\n"
+                   ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n"
+                   ".previous\n"
+                   /* This logically does 'lea "SYS_SYSCALL_ENTRYPOINT", %0' */
+                   "call 0f\n"
+                 "0:pop  %0\n"
+                   "add  $_GLOBAL_OFFSET_TABLE_+[.-0b], %0\n"
+                   "mov  " SYS_SYSCALL_ENTRYPOINT "@GOT(%0), %0\n"
+                   : "=r"(entrypoint));
+      return entrypoint;
+    }
+
+    #define LSS_ENTRYPOINT ".bss\n"                                           \
+                           ".align 8\n"                                       \
+                           ".globl " SYS_SYSCALL_ENTRYPOINT "\n"              \
+                           ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n"         \
+                           ".previous\n"                                      \
+                           /* Check the SYS_SYSCALL_ENTRYPOINT vector      */ \
+                           "push %%eax\n"                                     \
+                           "call 10000f\n"                                    \
+                     "10000:pop  %%eax\n"                                     \
+                           "add  $_GLOBAL_OFFSET_TABLE_+[.-10000b], %%eax\n"  \
+                           "mov  " SYS_SYSCALL_ENTRYPOINT                     \
+                                 "@GOT(%%eax), %%eax\n"                       \
+                           "mov  0(%%eax), %%eax\n"                           \
+                           "test %%eax, %%eax\n"                              \
+                           "jz   10002f\n"                                    \
+                           "push %%eax\n"                                     \
+                           "call 10001f\n"                                    \
+                     "10001:pop  %%eax\n"                                     \
+                           "add  $(10003f-10001b), %%eax\n"                   \
+                           "xchg 4(%%esp), %%eax\n"                           \
+                           "ret\n"                                            \
+                     "10002:pop  %%eax\n"                                     \
+                           "int $0x80\n"                                      \
+                     "10003:\n"
+    #else
+    #define LSS_ENTRYPOINT "int $0x80\n"
+    #endif
     #undef  LSS_BODY
     #define LSS_BODY(type,args...)                                            \
       long __res;                                                             \
       __asm__ __volatile__("push %%ebx\n"                                     \
-                           CFI_ADJUST_CFA_OFFSET(4)                           \
                            "movl %2,%%ebx\n"                                  \
-                           "int $0x80\n"                                      \
-                           "pop %%ebx\n"                                      \
-                           CFI_ADJUST_CFA_OFFSET(-4)                          \
+                           LSS_ENTRYPOINT                                     \
+                           "pop %%ebx"                                        \
                            args                                               \
                            : "esp", "memory");                                \
       LSS_RETURN(type,__res)
@@ -1655,7 +1968,7 @@ struct kernel_io_event {
     #define _syscall0(type,name)                                              \
       type LSS_NAME(name)(void) {                                             \
         long __res;                                                           \
-        __asm__ volatile("int $0x80"                                          \
+        __asm__ volatile(LSS_ENTRYPOINT                                       \
                          : "=a" (__res)                                       \
                          : "0" (__NR_##name)                                  \
                          : "memory");                                         \
@@ -1700,7 +2013,7 @@ struct kernel_io_event {
         __asm__ __volatile__("push %%ebx\n"                                   \
                              "movl %2,%%ebx\n"                                \
                              "movl %1,%%eax\n"                                \
-                             "int  $0x80\n"                                   \
+                             LSS_ENTRYPOINT                                   \
                              "pop  %%ebx"                                     \
                              : "=a" (__res)                                   \
                              : "i" (__NR_##name), "ri" ((long)(arg1)),        \
@@ -1721,7 +2034,7 @@ struct kernel_io_event {
                              "movl 4(%2),%%ebp\n"                             \
                              "movl 0(%2), %%ebx\n"                            \
                              "movl %1,%%eax\n"                                \
-                             "int  $0x80\n"                                   \
+                             LSS_ENTRYPOINT                                   \
                              "pop  %%ebx\n"                                   \
                              "pop  %%ebp"                                     \
                              : "=a" (__res)                                   \
@@ -1777,7 +2090,7 @@ struct kernel_io_event {
                            "pushl  %%ebx\n"
                            "movl   %%eax,%%ebx\n"
                            "movl   %2,%%eax\n"
-                           "int    $0x80\n"
+                           LSS_ENTRYPOINT
 
                            /* In the parent: restore %ebx
                             * In the child:  move "fn" into %ebx
@@ -1805,7 +2118,7 @@ struct kernel_io_event {
                             */
                            "movl   %%eax,%%ebx\n"
                            "movl   $1,%%eax\n"
-                           "int    $0x80\n"
+                           LSS_ENTRYPOINT
 
                            /* Return to parent.
                             */
@@ -1818,32 +2131,6 @@ struct kernel_io_event {
       LSS_RETURN(int, __res);
     }
 
-    #define __NR__fadvise64_64 __NR_fadvise64_64
-    LSS_INLINE _syscall6(int, _fadvise64_64, int, fd,
-                         unsigned, offset_lo, unsigned, offset_hi,
-                         unsigned, len_lo, unsigned, len_hi,
-                         int, advice)
-
-    LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
-                                       loff_t len, int advice) {
-      return LSS_NAME(_fadvise64_64)(fd,
-                                     (unsigned)offset, (unsigned)(offset >>32),
-                                     (unsigned)len, (unsigned)(len >> 32),
-                                     advice);
-    }
-
-    #define __NR__fallocate __NR_fallocate
-    LSS_INLINE _syscall6(int, _fallocate, int, fd,
-                         int, mode,
-                         unsigned, offset_lo, unsigned, offset_hi,
-                         unsigned, len_lo, unsigned, len_hi)
-
-    LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode,
-                                       loff_t offset, loff_t len) {
-      union { loff_t off; unsigned w[2]; } o = { offset }, l = { len };
-      return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]);
-    }
-
     LSS_INLINE _syscall1(int, set_thread_area, void *, u)
     LSS_INLINE _syscall1(int, get_thread_area, void *, u)
 
@@ -1858,7 +2145,7 @@ struct kernel_io_event {
       __asm__ __volatile__("call   2f\n"
                          "0:.align 16\n"
                          "1:movl   %1,%%eax\n"
-                           "int    $0x80\n"
+                           LSS_ENTRYPOINT
                          "2:popl   %0\n"
                            "addl   $(1b-0b),%0\n"
                            : "=a" (res)
@@ -1877,7 +2164,7 @@ struct kernel_io_event {
                          "0:.align 16\n"
                          "1:pop    %%eax\n"
                            "movl   %1,%%eax\n"
-                           "int    $0x80\n"
+                           LSS_ENTRYPOINT
                          "2:popl   %0\n"
                            "addl   $(1b-0b),%0\n"
                            : "=a" (res)
@@ -1891,74 +2178,171 @@ struct kernel_io_event {
      * location (e.g. when using the clone() system call with the CLONE_VM
      * option).
      */
+    #undef LSS_ENTRYPOINT
+    #ifdef SYS_SYSCALL_ENTRYPOINT
+    static inline void (**LSS_NAME(get_syscall_entrypoint)(void))(void) {
+      void (**entrypoint)(void);
+      asm volatile(".bss\n"
+                   ".align 8\n"
+                   ".globl " SYS_SYSCALL_ENTRYPOINT "\n"
+                   ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n"
+                   ".previous\n"
+                   "mov " SYS_SYSCALL_ENTRYPOINT "@GOTPCREL(%%rip), %0\n"
+                   : "=r"(entrypoint));
+      return entrypoint;
+    }
+
+    #define LSS_ENTRYPOINT                                                    \
+              ".bss\n"                                                        \
+              ".align 8\n"                                                    \
+              ".globl " SYS_SYSCALL_ENTRYPOINT "\n"                           \
+              ".common " SYS_SYSCALL_ENTRYPOINT ",8,8\n"                      \
+              ".previous\n"                                                   \
+              "mov " SYS_SYSCALL_ENTRYPOINT "@GOTPCREL(%%rip), %%rcx\n"       \
+              "mov  0(%%rcx), %%rcx\n"                                        \
+              "test %%rcx, %%rcx\n"                                           \
+              "jz   10001f\n"                                                 \
+              "call *%%rcx\n"                                                 \
+              "jmp  10002f\n"                                                 \
+        "10001:syscall\n"                                                     \
+        "10002:\n"
+
+    #else
+    #define LSS_ENTRYPOINT "syscall\n"
+    #endif
+
+    /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
+     * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
+     * sign extension.  We can't cast pointers directly because those are
+     * 32 bits, and gcc will dump ugly warnings about casting from a pointer
+     * to an integer of a different size.
+     */
+    #undef  LSS_SYSCALL_ARG
+    #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
+    #undef  _LSS_RETURN
+    #define _LSS_RETURN(type, res, cast)                                      \
+      do {                                                                    \
+        if ((uint64_t)(res) >= (uint64_t)(-4095)) {                           \
+          LSS_ERRNO = -(res);                                                 \
+          res = -1;                                                           \
+        }                                                                     \
+        return (type)(cast)(res);                                             \
+      } while (0)
+    #undef  LSS_RETURN
+    #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
+
+    #undef  _LSS_BODY
+    #define _LSS_BODY(nr, type, name, cast, ...)                              \
+          long long __res;                                                    \
+          __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT                \
+            : "=a" (__res)                                                    \
+            : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__)                 \
+            : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory");                   \
+          _LSS_RETURN(type, __res, cast)
     #undef  LSS_BODY
-    #define LSS_BODY(type,name, ...)                                          \
-          long __res;                                                         \
-          __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name),  \
-            ##__VA_ARGS__ : "r11", "rcx", "memory");                          \
-          LSS_RETURN(type, __res)
+    #define LSS_BODY(nr, type, name, args...) \
+      _LSS_BODY(nr, type, name, uintptr_t, ## args)
+
+    #undef  LSS_BODY_ASM0
+    #undef  LSS_BODY_ASM1
+    #undef  LSS_BODY_ASM2
+    #undef  LSS_BODY_ASM3
+    #undef  LSS_BODY_ASM4
+    #undef  LSS_BODY_ASM5
+    #undef  LSS_BODY_ASM6
+    #define LSS_BODY_ASM0
+    #define LSS_BODY_ASM1 LSS_BODY_ASM0
+    #define LSS_BODY_ASM2 LSS_BODY_ASM1
+    #define LSS_BODY_ASM3 LSS_BODY_ASM2
+    #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
+    #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
+    #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
+
+    #undef  LSS_BODY_CLOBBER0
+    #undef  LSS_BODY_CLOBBER1
+    #undef  LSS_BODY_CLOBBER2
+    #undef  LSS_BODY_CLOBBER3
+    #undef  LSS_BODY_CLOBBER4
+    #undef  LSS_BODY_CLOBBER5
+    #undef  LSS_BODY_CLOBBER6
+    #define LSS_BODY_CLOBBER0
+    #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
+    #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
+    #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
+    #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
+    #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
+    #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
+
+    #undef  LSS_BODY_ARG0
+    #undef  LSS_BODY_ARG1
+    #undef  LSS_BODY_ARG2
+    #undef  LSS_BODY_ARG3
+    #undef  LSS_BODY_ARG4
+    #undef  LSS_BODY_ARG5
+    #undef  LSS_BODY_ARG6
+    #define LSS_BODY_ARG0()
+    #define LSS_BODY_ARG1(arg1) \
+      LSS_BODY_ARG0(), "D" (arg1)
+    #define LSS_BODY_ARG2(arg1, arg2) \
+      LSS_BODY_ARG1(arg1), "S" (arg2)
+    #define LSS_BODY_ARG3(arg1, arg2, arg3) \
+      LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
+    #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
+      LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
+    #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
+      LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
+    #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
+      LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
+
     #undef _syscall0
     #define _syscall0(type,name)                                              \
-      type LSS_NAME(name)() {                                                 \
-        LSS_BODY(type, name);                                                 \
+      type LSS_NAME(name)(void) {                                             \
+        LSS_BODY(0, type, name);                                              \
       }
     #undef _syscall1
     #define _syscall1(type,name,type1,arg1)                                   \
       type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_BODY(type, name, "D" ((long)(arg1)));                             \
+        LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1));                       \
       }
     #undef _syscall2
     #define _syscall2(type,name,type1,arg1,type2,arg2)                        \
       type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)));         \
+        LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
       }
     #undef _syscall3
     #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)             \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)),          \
-                             "d" ((long)(arg3)));                             \
+        LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3));                       \
       }
     #undef _syscall4
     #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-          long __res;                                                         \
-          __asm__ __volatile__("movq %5,%%r10; syscall" :                     \
-            "=a" (__res) : "0" (__NR_##name),                                 \
-            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
-            "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory");              \
-          LSS_RETURN(type, __res);                                            \
+        LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
       }
     #undef _syscall5
     #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
                       type5,arg5)                                             \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
                           type5 arg5) {                                       \
-          long __res;                                                         \
-          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" :       \
-            "=a" (__res) : "0" (__NR_##name),                                 \
-            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
-            "r" ((long)(arg4)), "r" ((long)(arg5)) :                          \
-            "r8", "r10", "r11", "rcx", "memory");                             \
-          LSS_RETURN(type, __res);                                            \
+        LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
+                                LSS_SYSCALL_ARG(arg5));                       \
       }
     #undef _syscall6
     #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
                       type5,arg5,type6,arg6)                                  \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
                           type5 arg5, type6 arg6) {                           \
-          long __res;                                                         \
-          __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;"   \
-                               "syscall" :                                    \
-            "=a" (__res) : "0" (__NR_##name),                                 \
-            "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
-            "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) :      \
-            "r8", "r9", "r10", "r11", "rcx", "memory");                       \
-          LSS_RETURN(type, __res);                                            \
+        LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+                                LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
+                                LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
       }
     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
                                    int flags, void *arg, int *parent_tidptr,
                                    void *newtls, int *child_tidptr) {
-      long __res;
+      long long __res;
       {
         __asm__ __volatile__(/* if (fn == NULL)
                               *   return -EINVAL;
@@ -1972,10 +2356,8 @@ struct kernel_io_event {
                              "testq  %5,%5\n"
                              "jz     1f\n"
 
-                             /* Set up alignment of the child stack:
-                              * child_stack = (child_stack & ~0xF) - 16;
+                             /* childstack -= 2*sizeof(void *);
                               */
-                             "andq   $-16,%5\n"
                              "subq   $16,%5\n"
 
                              /* Push "arg" and "fn" onto the stack that will be
@@ -1994,7 +2376,7 @@ struct kernel_io_event {
                              "movq   %2,%%rax\n"
                              "movq   %9,%%r8\n"
                              "movq   %10,%%r10\n"
-                             "syscall\n"
+                             LSS_ENTRYPOINT
 
                              /* if (%rax != 0)
                               *   return;
@@ -2016,22 +2398,25 @@ struct kernel_io_event {
                               */
                              "movq   %%rax,%%rdi\n"
                              "movq   %3,%%rax\n"
-                             "syscall\n"
+                             LSS_ENTRYPOINT
 
                              /* Return to parent.
                               */
                            "1:\n"
                              : "=a" (__res)
                              : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
-                               "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
-                               "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
-                             : "rsp", "memory", "r8", "r10", "r11", "rcx");
+                               "r"(LSS_SYSCALL_ARG(fn)),
+                               "S"(LSS_SYSCALL_ARG(child_stack)),
+                               "D"(LSS_SYSCALL_ARG(flags)),
+                               "r"(LSS_SYSCALL_ARG(arg)),
+                               "d"(LSS_SYSCALL_ARG(parent_tidptr)),
+                               "r"(LSS_SYSCALL_ARG(newtls)),
+                               "r"(LSS_SYSCALL_ARG(child_tidptr))
+                             : "memory", "r8", "r10", "r11", "rcx");
       }
       LSS_RETURN(int, __res);
     }
     LSS_INLINE _syscall2(int, arch_prctl, int, c, void *, a)
-    LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
-                         int,  advice)
 
     LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
       /* On x86-64, the kernel does not know how to return from
@@ -2040,175 +2425,111 @@ struct kernel_io_event {
        * Unfortunately, we cannot just reference the glibc version of this
        * function, as glibc goes out of its way to make it inaccessible.
        */
-      void (*res)(void);
-      __asm__ __volatile__("call   2f\n"
-                         "0:.align 16\n"
+      long long res;
+      __asm__ __volatile__("jmp    2f\n"
+                           ".align 16\n"
                          "1:movq   %1,%%rax\n"
-                           "syscall\n"
-                         "2:popq   %0\n"
-                           "addq   $(1b-0b),%0\n"
-                           : "=a" (res)
+                           LSS_ENTRYPOINT
+                         "2:leaq   1b(%%rip),%0\n"
+                           : "=r" (res)
                            : "i"  (__NR_rt_sigreturn));
-      return res;
+      return (void (*)(void))(uintptr_t)res;
     }
-  #elif defined(__arm__)
+  #elif defined(__ARM_ARCH_3__)
     /* Most definitions of _syscallX() neglect to mark "memory" as being
      * clobbered. This causes problems with compilers, that do a better job
      * at optimizing across __asm__ calls.
-     * So, we just have to redefine all fo the _syscallX() macros.
+     * So, we just have to redefine all of the _syscallX() macros.
      */
     #undef LSS_REG
     #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
-
-    /* r0..r3 are scratch registers and not preserved across function
-     * calls.  We need to first evaluate the first 4 syscall arguments
-     * and store them on stack.  They must be loaded into r0..r3 after
-     * all function calls to avoid r0..r3 being clobbered.
-     */
-    #undef LSS_SAVE_ARG
-    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
-    #undef LSS_LOAD_ARG
-    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
-
     #undef  LSS_BODY
-    #define LSS_BODY(type, name, args...)                                     \
+    #define LSS_BODY(type,name,args...)                                       \
           register long __res_r0 __asm__("r0");                               \
           long __res;                                                         \
-          __SYS_REG(name)                                                     \
-          __asm__ __volatile__ (__syscall_safe(name)                          \
-                                : "=r"(__res_r0)                              \
-                                : __SYS_REG_LIST(args)                        \
-                                : "lr", "memory");                            \
+          __asm__ __volatile__ (__syscall(name)                               \
+                                : "=r"(__res_r0) : args : "lr", "memory");    \
           __res = __res_r0;                                                   \
           LSS_RETURN(type, __res)
     #undef _syscall0
     #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)() {                                                 \
+      type LSS_NAME(name)(void) {                                             \
         LSS_BODY(type, name);                                                 \
       }
     #undef _syscall1
     #define _syscall1(type, name, type1, arg1)                                \
       type LSS_NAME(name)(type1 arg1) {                                       \
-        /* There is no need for using a volatile temp.  */                    \
-        LSS_REG(0, arg1);                                                     \
-        LSS_BODY(type, name, "r"(__r0));                                      \
+        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0));                    \
       }
     #undef _syscall2
     #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
       type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
+        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
       }
     #undef _syscall3
     #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
       }
     #undef _syscall4
-    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4)                                            \
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4);                                                     \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
       }
     #undef _syscall5
-    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4, type5, arg5)                               \
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
                           type5 arg5) {                                       \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_REG(4, arg5);                                                     \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
                              "r"(__r4));                                      \
       }
     #undef _syscall6
-    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
-                      type4, arg4, type5, arg5, type6, arg6)                  \
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
                           type5 arg5, type6 arg6) {                           \
-        LSS_SAVE_ARG(0, arg1);                                                \
-        LSS_SAVE_ARG(1, arg2);                                                \
-        LSS_SAVE_ARG(2, arg3);                                                \
-        LSS_SAVE_ARG(3, arg4);                                                \
-        LSS_REG(4, arg5);                                                     \
-        LSS_REG(5, arg6);                                                     \
-        LSS_LOAD_ARG(0);                                                      \
-        LSS_LOAD_ARG(1);                                                      \
-        LSS_LOAD_ARG(2);                                                      \
-        LSS_LOAD_ARG(3);                                                      \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
                              "r"(__r4), "r"(__r5));                           \
       }
     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
                                    int flags, void *arg, int *parent_tidptr,
                                    void *newtls, int *child_tidptr) {
-      register long __res __asm__("r5");
+      long __res;
       {
-        if (fn == NULL || child_stack == NULL) {
-            __res = -EINVAL;
-            goto clone_exit;
-        }
-
-        /* stash first 4 arguments on stack first because we can only load
-         * them after all function calls.
-         */
-        int    tmp_flags = flags;
-        int  * tmp_stack = (int*) child_stack;
-        void * tmp_ptid  = parent_tidptr;
-        void * tmp_tls   = newtls;
-
+        register int   __flags __asm__("r0") = flags;
+        register void *__stack __asm__("r1") = child_stack;
+        register void *__ptid  __asm__("r2") = parent_tidptr;
+        register void *__tls   __asm__("r3") = newtls;
         register int  *__ctid  __asm__("r4") = child_tidptr;
+        __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
+                              *   return -EINVAL;
+                              */
+                             "cmp   %2,#0\n"
+                             "cmpne %3,#0\n"
+                             "moveq %0,%1\n"
+                             "beq   1f\n"
 
-        /* Push "arg" and "fn" onto the stack that will be
-         * used by the child.
-         */
-        *(--tmp_stack) = (int) arg;
-        *(--tmp_stack) = (int) fn;
-
-        /* We must load r0..r3 last after all possible function calls.  */
-        register int   __flags __asm__("r0") = tmp_flags;
-        register void *__stack __asm__("r1") = tmp_stack;
-        register void *__ptid  __asm__("r2") = tmp_ptid;
-        register void *__tls   __asm__("r3") = tmp_tls;
-
-        /* %r0 = syscall(%r0 = flags,
-         *               %r1 = child_stack,
-         *               %r2 = parent_tidptr,
-         *               %r3 = newtls,
-         *               %r4 = child_tidptr)
-         */
-        __SYS_REG(clone)
-        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
+                             /* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "str   %5,[%3,#-4]!\n"
+                             "str   %2,[%3,#-4]!\n"
+
+                             /* %r0 = syscall(%r0 = flags,
                               *               %r1 = child_stack,
                               *               %r2 = parent_tidptr,
                               *               %r3 = newtls,
                               *               %r4 = child_tidptr)
                               */
-                             "push  {r7}\n"
-                             "mov   r7,%1\n"
                              __syscall(clone)"\n"
 
                              /* if (%r0 != 0)
@@ -2223,24 +2544,275 @@ struct kernel_io_event {
                              "mov   lr,pc\n"
                              "ldr   pc,[sp]\n"
 
-                             /* Call _exit(%r0), which never returns.  We only
-                              * need to set r7 for EABI syscall ABI but we do
-                              * this always to simplify code sharing between
-                              * old and new syscall ABIs.
+                             /* Call _exit(%r0).
+                              */
+                             __syscall(exit)"\n"
+                           "1:\n"
+                             : "=r" (__res)
+                             : "i"(-EINVAL),
+                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__tls), "r"(__ctid)
+                             : "cc", "lr", "memory");
+      }
+      LSS_RETURN(int, __res);
+    }
+  #elif defined(__ARM_EABI__)
+    /* Most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all fo the _syscallX() macros.
+     */
+    #undef LSS_REG
+    #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,args...)                                       \
+          register long __res_r0 __asm__("r0");                               \
+          long __res;                                                         \
+          __asm__ __volatile__ ("push {r7}\n"                                 \
+                                "mov r7, %1\n"                                \
+                                "swi 0x0\n"                                   \
+                                "pop {r7}\n"                                  \
+                                : "=r"(__res_r0)                              \
+                                : "i"(__NR_##name) , ## args                  \
+                                : "lr", "memory");                            \
+          __res = __res_r0;                                                   \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)(void) {                                             \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0));                    \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4);                                                     \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4));                                      \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4), "r"(__r5));                           \
+      }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __res;
+      if (fn == NULL || child_stack == NULL) {
+        __res = -EINVAL;
+        LSS_RETURN(int, __res);
+      }
+
+      /* Push "arg" and "fn" onto the stack that will be
+       * used by the child.
+       */
+      {
+        uintptr_t* cstack = (uintptr_t*)child_stack - 2;
+        cstack[0] = (uintptr_t)fn;
+        cstack[1] = (uintptr_t)arg;
+        child_stack = cstack;
+      }
+      {
+        register int   __flags __asm__("r0") = flags;
+        register void *__stack __asm__("r1") = child_stack;
+        register void *__ptid  __asm__("r2") = parent_tidptr;
+        register void *__tls   __asm__("r3") = newtls;
+        register int  *__ctid  __asm__("r4") = child_tidptr;
+        __asm__ __volatile__(
+#ifdef __thumb2__
+            "push {r7}\n"
+#endif
+            /* %r0 = syscall(%r0 = flags,
+             *               %r1 = child_stack,
+             *               %r2 = parent_tidptr,
+             *               %r3 = newtls,
+             *               %r4 = child_tidptr)
+             */
+            "mov r7, %6\n"
+            "swi 0x0\n"
+
+            /* if (%r0 != 0)
+             *   return %r0;
+             */
+            "cmp   r0, #0\n"
+            "bne   1f\n"
+
+            /* In the child, now. Call "fn(arg)".
+             */
+            "ldr   r0,[sp, #4]\n"
+
+            "ldr   lr,[sp]\n"
+            "blx   lr\n"
+
+            /* Call _exit(%r0).
+             */
+            "mov r7, %7\n"
+            "swi 0x0\n"
+            /* Unreachable */
+            "bkpt #0\n"
+         "1:\n"
+#ifdef __thumb2__
+            "pop {r7}\n"
+#endif
+            "movs  %0,r0\n"
+            : "=r"(__res)
+            : "r"(__stack), "r"(__flags), "r"(__ptid), "r"(__tls), "r"(__ctid),
+              "i"(__NR_clone), "i"(__NR_exit)
+            : "cc", "lr", "memory"
+#ifndef __thumb2__
+            , "r7"
+#endif
+            );
+      }
+      LSS_RETURN(int, __res);
+    }
+  #elif defined(__aarch64__)
+    /* Most definitions of _syscallX() neglect to mark "memory" as being
+     * clobbered. This causes problems with compilers, that do a better job
+     * at optimizing across __asm__ calls.
+     * So, we just have to redefine all of the _syscallX() macros.
+     */
+    #undef LSS_REG
+    #define LSS_REG(r,a) register int64_t __r##r __asm__("x"#r) = (int64_t)a
+    #undef  LSS_BODY
+    #define LSS_BODY(type,name,args...)                                       \
+          register int64_t __res_x0 __asm__("x0");                            \
+          int64_t __res;                                                      \
+          __asm__ __volatile__ ("mov x8, %1\n"                                \
+                                "svc 0x0\n"                                   \
+                                : "=r"(__res_x0)                              \
+                                : "i"(__NR_##name) , ## args                  \
+                                : "x8", "memory");                            \
+          __res = __res_x0;                                                   \
+          LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+      type LSS_NAME(name)(void) {                                             \
+        LSS_BODY(type, name);                                                 \
+      }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+      type LSS_NAME(name)(type1 arg1) {                                       \
+        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0));                    \
+      }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+      type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
+      }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
+      }
+    #undef _syscall4
+    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4);                                                     \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
+      }
+    #undef _syscall5
+    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5)                                             \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5) {                                       \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4));                                      \
+      }
+    #undef _syscall6
+    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
+                      type5,arg5,type6,arg6)                                  \
+      type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
+                          type5 arg5, type6 arg6) {                           \
+        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
+        LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
+                             "r"(__r4), "r"(__r5));                           \
+      }
+
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      int64_t __res;
+      {
+        register uint64_t __flags __asm__("x0") = flags;
+        register void *__stack __asm__("x1") = child_stack;
+        register void *__ptid  __asm__("x2") = parent_tidptr;
+        register void *__tls   __asm__("x3") = newtls;
+        register int  *__ctid  __asm__("x4") = child_tidptr;
+        __asm__ __volatile__(/* Push "arg" and "fn" onto the stack that will be
+                              * used by the child.
+                              */
+                             "stp     %1, %4, [%2, #-16]!\n"
+
+                             /* %x0 = syscall(%x0 = flags,
+                              *               %x1 = child_stack,
+                              *               %x2 = parent_tidptr,
+                              *               %x3 = newtls,
+                              *               %x4 = child_tidptr)
+                              */
+                             "mov     x8, %8\n"
+                             "svc     0x0\n"
+
+                             /* if (%r0 != 0)
+                              *   return %r0;
+                              */
+                             "mov     %0, x0\n"
+                             "cbnz    x0, 1f\n"
+
+                             /* In the child, now. Call "fn(arg)".
                               */
-                             "mov   r7,%2\n"
-                             __syscall(exit)"\n"
+                             "ldp     x1, x0, [sp], #16\n"
+                             "blr     x1\n"
 
-                             /* Pop r7 from the stack only in the parent.
+                             /* Call _exit(%r0).
                               */
-                           "1: pop {r7}\n"
+                             "mov     x8, %9\n"
+                             "svc     0x0\n"
+                           "1:\n"
                              : "=r" (__res)
-                             : "r"(__sysreg),
-                               "i"(__NR_exit), "r"(__stack), "r"(__flags),
-                               "r"(__ptid), "r"(__tls), "r"(__ctid)
-                             : "cc", "lr", "memory");
+                             : "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__tls), "r"(__ctid),
+                               "i"(__NR_clone), "i"(__NR_exit)
+                             : "cc", "x8", "memory");
       }
-      clone_exit:
       LSS_RETURN(int, __res);
     }
   #elif defined(__mips__)
@@ -2248,17 +2820,26 @@ struct kernel_io_event {
     #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
                                  (unsigned long)(a)
     #undef  LSS_BODY
+    #undef LSS_SYSCALL_CLOBBERS
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    #define LSS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10",               \
+                                 "$11", "$12", "$13", "$14", "$15",           \
+                                 "$24", "$25", "hi", "lo", "memory"
+    #else
+    #define LSS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12",             \
+                                 "$13", "$14", "$15", "$24", "$25",           \
+                                 "hi", "lo", "memory"
+    #endif
     #define LSS_BODY(type,name,r7,...)                                        \
           register unsigned long __v0 __asm__("$2") = __NR_##name;            \
           __asm__ __volatile__ ("syscall\n"                                   \
-                                : "=&r"(__v0), r7 (__r7)                      \
+                                : "=r"(__v0), r7 (__r7)                       \
                                 : "0"(__v0), ##__VA_ARGS__                    \
-                                : "$8", "$9", "$10", "$11", "$12",            \
-                                  "$13", "$14", "$15", "$24", "memory");      \
+                                : LSS_SYSCALL_CLOBBERS);                      \
           LSS_RETURN(type, __v0, __r7)
     #undef _syscall0
     #define _syscall0(type, name)                                             \
-      type LSS_NAME(name)() {                                                 \
+      type LSS_NAME(name)(void) {                                             \
         register unsigned long __r7 __asm__("$7");                            \
         LSS_BODY(type, name, "=r");                                           \
       }
@@ -2300,20 +2881,19 @@ struct kernel_io_event {
                           type5 arg5) {                                       \
         LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
         LSS_REG(7, arg4);                                                     \
-        register unsigned long __v0 __asm__("$2");                            \
+        register unsigned long __v0 __asm__("$2") = __NR_##name;              \
         __asm__ __volatile__ (".set noreorder\n"                              \
-                              "lw    $2, %6\n"                                \
                               "subu  $29, 32\n"                               \
-                              "sw    $2, 16($29)\n"                           \
-                              "li    $2, %2\n"                                \
+                              "sw    %5, 16($29)\n"                           \
                               "syscall\n"                                     \
                               "addiu $29, 32\n"                               \
                               ".set reorder\n"                                \
-                              : "=&r"(__v0), "+r" (__r7)                      \
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
-                                "r"(__r6), "m" ((unsigned long)arg5)          \
+                              : "+r"(__v0), "+r" (__r7)                       \
+                              : "r"(__r4), "r"(__r5),                         \
+                                "r"(__r6), "r" ((unsigned long)arg5)          \
                               : "$8", "$9", "$10", "$11", "$12",              \
-                                "$13", "$14", "$15", "$24", "memory");        \
+                                "$13", "$14", "$15", "$24", "$25",            \
+                                "memory");                                    \
         LSS_RETURN(type, __v0, __r7);                                         \
       }
     #else
@@ -2338,23 +2918,21 @@ struct kernel_io_event {
                           type5 arg5, type6 arg6) {                           \
         LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3);                 \
         LSS_REG(7, arg4);                                                     \
-        register unsigned long __v0 __asm__("$2");                            \
+        register unsigned long __v0 __asm__("$2") = __NR_##name;              \
         __asm__ __volatile__ (".set noreorder\n"                              \
-                              "lw    $2, %6\n"                                \
-                              "lw    $8, %7\n"                                \
                               "subu  $29, 32\n"                               \
-                              "sw    $2, 16($29)\n"                           \
-                              "sw    $8, 20($29)\n"                           \
-                              "li    $2, %2\n"                                \
+                              "sw    %5, 16($29)\n"                           \
+                              "sw    %6, 20($29)\n"                           \
                               "syscall\n"                                     \
                               "addiu $29, 32\n"                               \
                               ".set reorder\n"                                \
-                              : "=&r"(__v0), "+r" (__r7)                      \
-                              : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
+                              : "+r"(__v0), "+r" (__r7)                       \
+                              : "r"(__r4), "r"(__r5),                         \
                                 "r"(__r6), "r" ((unsigned long)arg5),         \
                                 "r" ((unsigned long)arg6)                     \
                               : "$8", "$9", "$10", "$11", "$12",              \
-                                "$13", "$14", "$15", "$24", "memory");        \
+                                "$13", "$14", "$15", "$24", "$25",            \
+                                "memory");                                    \
         LSS_RETURN(type, __v0, __r7);                                         \
       }
     #else
@@ -2371,7 +2949,7 @@ struct kernel_io_event {
     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
                                    int flags, void *arg, int *parent_tidptr,
                                    void *newtls, int *child_tidptr) {
-      register unsigned long __v0 __asm__("$2");
+      register unsigned long __v0 __asm__("$2") = -EINVAL;
       register unsigned long __r7 __asm__("$7") = (unsigned long)newtls;
       {
         register int   __flags __asm__("$4") = flags;
@@ -2390,25 +2968,24 @@ struct kernel_io_event {
                              /* if (fn == NULL || child_stack == NULL)
                               *   return -EINVAL;
                               */
-                             "li    %0,%2\n"
+                             "beqz  %4,1f\n"
                              "beqz  %5,1f\n"
-                             "beqz  %6,1f\n"
 
                              /* Push "arg" and "fn" onto the stack that will be
                               * used by the child.
                               */
           #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32
-                             "subu  %6,32\n"
-                             "sw    %5,0(%6)\n"
-                             "sw    %8,4(%6)\n"
+                             "subu  %5,32\n"
+                             "sw    %4,0(%5)\n"
+                             "sw    %7,4(%5)\n"
           #elif _MIPS_SIM == _MIPS_SIM_NABI32
-                             "sub   %6,32\n"
-                             "sw    %5,0(%6)\n"
-                             "sw    %8,8(%6)\n"
+                             "sub   %5,32\n"
+                             "sw    %4,0(%5)\n"
+                             "sw    %7,8(%5)\n"
           #else
-                             "dsubu %6,32\n"
-                             "sd    %5,0(%6)\n"
-                             "sd    %8,8(%6)\n"
+                             "dsubu %5,32\n"
+                             "sd    %4,0(%5)\n"
+                             "sd    %7,8(%5)\n"
           #endif
 
                              /* $7 = syscall($4 = flags,
@@ -2417,7 +2994,7 @@ struct kernel_io_event {
                               *              $7 = newtls,
                               *              $8 = child_tidptr)
                               */
-                             "li    $2,%3\n"
+                             "li    $2,%2\n"
                              "syscall\n"
 
                              /* if ($7 != 0)
@@ -2443,7 +3020,7 @@ struct kernel_io_event {
                              /* Call _exit($2)
                               */
                             "move  $4,$2\n"
-                            "li    $2,%4\n"
+                            "li    $2,%3\n"
                             "syscall\n"
 
                            "1:\n"
@@ -2454,12 +3031,12 @@ struct kernel_io_event {
           #else
                              "daddu $29,16\n"
           #endif
-                             : "=&r" (__v0), "=r" (__r7)
-                             : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
-                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
-                               "r"(__ptid), "r"(__r7), "r"(__ctid)
+                             : "+r" (__v0), "+r" (__r7)
+                             : "i"(__NR_clone), "i"(__NR_exit), "r"(fn),
+                               "r"(__stack), "r"(__flags), "r"(arg),
+                               "r"(__ptid), "r"(__ctid)
                              : "$9", "$10", "$11", "$12", "$13", "$14", "$15",
-                               "$24", "memory");
+                               "$24", "$25", "memory");
       }
       LSS_RETURN(int, __v0, __r7);
     }
@@ -2572,19 +3149,8 @@ struct kernel_io_event {
                                                type5 arg5, type6 arg6) {      \
           LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6);        \
        }
-
-    #undef LSS_PPC_MINIMUM_FRAME_SIZE
-    #undef LSS_SIZE_S
-    #ifdef __PPC64__
-      #define LSS_PPC_MINIMUM_FRAME_SIZE 112
-      #define LSS_SIZE_S "d"
-    #else
-      #define LSS_PPC_MINIMUM_FRAME_SIZE 16
-      #define LSS_SIZE_S "w"
-    #endif
-
     /* clone function adapted from glibc 2.3.6 clone.S                       */
-    /* TODO(user): consider wrapping some args up in a struct, like we
+    /* TODO(csilvers): consider wrapping some args up in a struct, like we
      * do for i386's _syscall6, so we can compile successfully on gcc 2.95
      */
     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
@@ -2603,17 +3169,17 @@ struct kernel_io_event {
             /* check for fn == NULL
              * and child_stack == NULL
              */
-            "cmp" LSS_SIZE_S "i cr0, %6, 0\n\t"
-            "cmp" LSS_SIZE_S "i cr1, %7, 0\n\t"
+            "cmpwi cr0, %6, 0\n\t"
+            "cmpwi cr1, %7, 0\n\t"
             "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t"
             "beq- cr0, 1f\n\t"
 
             /* set up stack frame for child                                  */
-            "clrr" LSS_SIZE_S "i %7, %7, 4\n\t"
+            "clrrwi %7, %7, 4\n\t"
             "li 0, 0\n\t"
-            "st" LSS_SIZE_S "u 0, %13(%7)\n\t"
+            "stwu 0, -16(%7)\n\t"
 
-            /* fn, arg, child_stack are saved across the syscall: r27-29     */
+            /* fn, arg, child_stack are saved across the syscall: r28-30     */
             "mr 28, %6\n\t"
             "mr 29, %7\n\t"
             "mr 27, %9\n\t"
@@ -2629,30 +3195,14 @@ struct kernel_io_event {
             "sc\n\t"
 
             /* Test if syscall was successful                                */
-            "cmp" LSS_SIZE_S "i cr1, 3, 0\n\t"
+            "cmpwi cr1, 3, 0\n\t"
             "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t"
             "bne- cr1, 1f\n\t"
 
-            /* Do the function call.  On PowerPC64, a function pointer points
-             * a function descriptor instead of the first instruction.  We need
-             * to load the callee's entry point and TOC from the descriptor.
-             * Since the callee may have a differet TOC, we also need to
-             * save and restore caller's TOC around the call.
-             */
-
-    #ifdef __PPC64__
-            "std 2, 40(1)\n\t"  /* Save caller's TOC. */
-            "ld 4, 0(28)\n\t"   /* Get callee's entry address. */
-            "ld 2, 8(28)\n\t"   /* Load calee's TOC. */
-            "mtctr 4\n\t"
-            "mr 3, 27\n\t"
-            "bctrl\n\t"
-            "ld 2, 40(1)\n\t"  /* Restore caller's TOC after call. */
-    #else
+            /* Do the function call                                          */
             "mtctr 28\n\t"
             "mr 3, 27\n\t"
             "bctrl\n\t"
-    #endif
 
             /* Call _exit(r3)                                                */
             "li 0, %5\n\t"
@@ -2667,59 +3217,200 @@ struct kernel_io_event {
                 "i" (__NR_clone), "i" (__NR_exit),
                 "r" (__fn), "r" (__cstack), "r" (__flags),
                 "r" (__arg), "r" (__ptidptr), "r" (__newtls),
-                "r" (__ctidptr), "i"(-LSS_PPC_MINIMUM_FRAME_SIZE)
+                "r" (__ctidptr)
               : "cr0", "cr1", "memory", "ctr",
                 "r0", "r29", "r27", "r28");
       }
       LSS_RETURN(int, __ret, __err);
     }
-  #ifdef __PPC64__
-    LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
-                         int,  advice)
-  #else
-    /* fadvise64 wrapper not yet implemented for 32-bit PowerPC.  */
+  #elif defined(__s390__)
+    #undef  LSS_REG
+    #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a
+    #undef  LSS_BODY
+    #define LSS_BODY(type, name, args...)                                     \
+        register unsigned long __nr __asm__("r1")                             \
+            = (unsigned long)(__NR_##name);                                   \
+        register long __res_r2 __asm__("r2");                                 \
+        long __res;                                                           \
+        __asm__ __volatile__                                                  \
+            ("svc 0\n\t"                                                      \
+             : "=d"(__res_r2)                                                 \
+             : "d"(__nr), ## args                                             \
+             : "memory");                                                     \
+        __res = __res_r2;                                                     \
+        LSS_RETURN(type, __res)
+    #undef _syscall0
+    #define _syscall0(type, name)                                             \
+       type LSS_NAME(name)(void) {                                            \
+          LSS_BODY(type, name);                                               \
+       }
+    #undef _syscall1
+    #define _syscall1(type, name, type1, arg1)                                \
+       type LSS_NAME(name)(type1 arg1) {                                      \
+          LSS_REG(2, arg1);                                                   \
+          LSS_BODY(type, name, "0"(__r2));                                    \
+       }
+    #undef _syscall2
+    #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2) {                          \
+          LSS_REG(2, arg1); LSS_REG(3, arg2);                                 \
+          LSS_BODY(type, name, "0"(__r2), "d"(__r3));                         \
+       }
+    #undef _syscall3
+    #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {              \
+          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
+          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4));              \
+       }
+    #undef _syscall4
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4)                                \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
+                           type4 arg4) {                                      \
+          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
+          LSS_REG(5, arg4);                                                   \
+          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
+                               "d"(__r5));                                    \
+       }
+    #undef _syscall5
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5)                   \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
+                           type4 arg4, type5 arg5) {                          \
+          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
+          LSS_REG(5, arg4); LSS_REG(6, arg5);                                 \
+          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
+                               "d"(__r5), "d"(__r6));                         \
+       }
+    #undef _syscall6
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                                  type4, arg4, type5, arg5, type6, arg6)      \
+       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3,                \
+                           type4 arg4, type5 arg5, type6 arg6) {              \
+          LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3);               \
+          LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6);               \
+          LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4),               \
+                               "d"(__r5), "d"(__r6), "d"(__r7));              \
+       }
+    LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+                                   int flags, void *arg, int *parent_tidptr,
+                                   void *newtls, int *child_tidptr) {
+      long __ret;
+      {
+        register int  (*__fn)(void *)    __asm__ ("r1")  = fn;
+        register void  *__cstack         __asm__ ("r2")  = child_stack;
+        register int    __flags          __asm__ ("r3")  = flags;
+        register void  *__arg            __asm__ ("r0")  = arg;
+        register int   *__ptidptr        __asm__ ("r4")  = parent_tidptr;
+        register void  *__newtls         __asm__ ("r6")  = newtls;
+        register int   *__ctidptr        __asm__ ("r5")  = child_tidptr;
+        __asm__ __volatile__ (
+    #ifndef __s390x__
+                                  /* arg already in r0 */
+          "ltr %4, %4\n\t"        /* check fn, which is already in r1 */
+          "jz 1f\n\t"             /* NULL function pointer, return -EINVAL */
+          "ltr %5, %5\n\t"        /* check child_stack, which is already in r2 */
+          "jz 1f\n\t"             /* NULL stack pointer, return -EINVAL */
+                                  /* flags already in r3 */
+                                  /* parent_tidptr already in r4 */
+                                  /* child_tidptr already in r5 */
+                                  /* newtls already in r6 */
+          "svc %2\n\t"            /* invoke clone syscall */
+          "ltr %0,%%r2\n\t"       /* load return code into __ret and test */
+          "jnz 1f\n\t"            /* return to parent if non-zero */
+                                  /* start child thread */
+          "lr %%r2, %7\n\t"       /* set first parameter to void *arg */
+          "ahi %%r15, -96\n\t"    /* make room on the stack for the save area */
+          "xc 0(4,%%r15), 0(%%r15)\n\t"
+          "basr %%r14, %4\n\t"    /* jump to fn */
+          "svc %3\n"              /* invoke exit syscall */
+          "1:\n"
+    #else
+                                  /* arg already in r0 */
+          "ltgr %4, %4\n\t"       /* check fn, which is already in r1 */
+          "jz 1f\n\t"             /* NULL function pointer, return -EINVAL */
+          "ltgr %5, %5\n\t"       /* check child_stack, which is already in r2 */
+          "jz 1f\n\t"             /* NULL stack pointer, return -EINVAL */
+                                  /* flags already in r3 */
+                                  /* parent_tidptr already in r4 */
+                                  /* child_tidptr already in r5 */
+                                  /* newtls already in r6 */
+          "svc %2\n\t"            /* invoke clone syscall */
+          "ltgr %0, %%r2\n\t"     /* load return code into __ret and test */
+          "jnz 1f\n\t"            /* return to parent if non-zero */
+                                  /* start child thread */
+          "lgr %%r2, %7\n\t"      /* set first parameter to void *arg */
+          "aghi %%r15, -160\n\t"  /* make room on the stack for the save area */
+          "xc 0(8,%%r15), 0(%%r15)\n\t"
+          "basr %%r14, %4\n\t"    /* jump to fn */
+          "svc %3\n"              /* invoke exit syscall */
+          "1:\n"
+    #endif
+          : "=r" (__ret)
+          : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit),
+            "d" (__fn), "d" (__cstack), "d" (__flags), "d" (__arg),
+            "d" (__ptidptr), "d" (__newtls), "d" (__ctidptr)
+          : "cc", "r14", "memory"
+        );
+      }
+      LSS_RETURN(int, __ret);
+    }
   #endif
-  #endif  /* defined (__PPC__) */
   #define __NR__exit   __NR_exit
   #define __NR__gettid __NR_gettid
   #define __NR__mremap __NR_mremap
-  LSS_INLINE _syscall1(int,     brk,             void *,      e)
-  LSS_INLINE _syscall2(int,     capset,
-                       struct kernel_cap_user_header*, h,
-                       struct kernel_cap_user_data*, d)
+  LSS_INLINE _syscall1(void *,  brk,             void *,      e)
   LSS_INLINE _syscall1(int,     chdir,           const char *,p)
-  LSS_INLINE _syscall1(int,     chroot,          const char *,p)
   LSS_INLINE _syscall1(int,     close,           int,         f)
   LSS_INLINE _syscall2(int,     clock_getres,    int,         c,
                        struct kernel_timespec*, t)
   LSS_INLINE _syscall2(int,     clock_gettime,   int,         c,
                        struct kernel_timespec*, t)
   LSS_INLINE _syscall1(int,     dup,             int,         f)
-  LSS_INLINE _syscall2(int,     dup2,            int,         s,
-                       int,            d)
+  #if defined(__NR_dup2)
+    // dup2 is polyfilled below when not available.
+    LSS_INLINE _syscall2(int,     dup2,            int,         s,
+                         int,            d)
+  #endif
+  #if defined(__NR_dup3)
+    LSS_INLINE _syscall3(int, dup3,  int, s, int, d, int, f)
+  #endif
   LSS_INLINE _syscall3(int,     execve,          const char*, f,
                        const char*const*,a,const char*const*, e)
   LSS_INLINE _syscall1(int,     _exit,           int,         e)
   LSS_INLINE _syscall1(int,     exit_group,      int,         e)
   LSS_INLINE _syscall3(int,     fcntl,           int,         f,
                        int,            c, long,   a)
-  LSS_INLINE _syscall0(pid_t,   fork)
+  #if defined(__NR_fork)
+    // fork is polyfilled below when not available.
+    LSS_INLINE _syscall0(pid_t,   fork)
+  #endif
   LSS_INLINE _syscall2(int,     fstat,           int,         f,
                       struct kernel_stat*,   b)
   LSS_INLINE _syscall2(int,     fstatfs,         int,         f,
                       struct kernel_statfs*, b)
-  LSS_INLINE _syscall2(int, ftruncate,           int,         f,
-                       off_t,          l)
-  LSS_INLINE _syscall4(int,     futex,           int*,        a,
-                       int,            o, int,    v,
-                      struct kernel_timespec*, t)
+  #if defined(__x86_64__)
+    /* Need to make sure off_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE int LSS_NAME(ftruncate)(int f, off_t l) {
+      LSS_BODY(2, int, ftruncate, LSS_SYSCALL_ARG(f), (uint64_t)(l));
+    }
+  #else
+    LSS_INLINE _syscall2(int, ftruncate,           int,         f,
+                         off_t,          l)
+  #endif
+  LSS_INLINE _syscall6(int,     futex,          int*,        u,
+                       int,     o,              int,         v,
+                       struct kernel_timespec*, t,
+                       int*,    u2,             int,         v2)
   LSS_INLINE _syscall3(int,     getdents,        int,         f,
-                      struct kernel_dirent*, d, int,    c)
+                       struct kernel_dirent*, d, int,    c)
   LSS_INLINE _syscall3(int,     getdents64,      int,         f,
                       struct kernel_dirent64*, d, int,    c)
   LSS_INLINE _syscall0(gid_t,   getegid)
   LSS_INLINE _syscall0(uid_t,   geteuid)
-  LSS_INLINE _syscall0(pid_t,   getpgrp)
+  #if defined(__NR_getpgrp)
+    LSS_INLINE _syscall0(pid_t,   getpgrp)
+  #endif
   LSS_INLINE _syscall0(pid_t,   getpid)
   LSS_INLINE _syscall0(pid_t,   getppid)
   LSS_INLINE _syscall2(int,     getpriority,     int,         a,
@@ -2728,15 +3419,14 @@ struct kernel_io_event {
                        gid_t *,         e,       gid_t *,     s)
   LSS_INLINE _syscall3(int,     getresuid,       uid_t *,     r,
                        uid_t *,         e,       uid_t *,     s)
-  #ifndef __ARM_EABI__
-  /* No available on ARM EABI Linux.  */
+#if !defined(__ARM_EABI__)
   LSS_INLINE _syscall2(int,     getrlimit,       int,         r,
                       struct kernel_rlimit*, l)
-  #endif
+#endif
   LSS_INLINE _syscall1(pid_t,   getsid,          pid_t,       p)
   LSS_INLINE _syscall0(pid_t,   _gettid)
-  LSS_INLINE _syscall2(int,     gettimeofday,    struct timeval *, v,
-                       struct timezone *, z)
+  LSS_INLINE _syscall2(pid_t,   gettimeofday,    struct kernel_timeval*, t,
+                       void*, tz)
   LSS_INLINE _syscall5(int,     setxattr,        const char *,p,
                        const char *,   n,        const void *,v,
                        size_t,         s,        int,         f)
@@ -2759,8 +3449,16 @@ struct kernel_io_event {
                        int,     who,             int,         ioprio)
   LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
                        int,            s)
-  LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
-                       off_t,          o, int,    w)
+  #if defined(__x86_64__)
+    /* Need to make sure off_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
+      _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
+                                        LSS_SYSCALL_ARG(w));
+    }
+  #else
+    LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
+                         off_t,          o, int,    w)
+  #endif
   LSS_INLINE _syscall2(int,     munmap,          void*,       s,
                        size_t,         l)
   LSS_INLINE _syscall6(long,    move_pages,      pid_t,       p,
@@ -2771,33 +3469,28 @@ struct kernel_io_event {
   LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
                        size_t,         os,       size_t,      ns,
                        unsigned long,  f, void *, a)
-  LSS_INLINE _syscall3(int,     open,            const char*, p,
-                       int,            f, int,    m)
-  LSS_INLINE _syscall3(int,     poll,           struct kernel_pollfd*, u,
-                       unsigned int,   n, int,    t)
-  LSS_INLINE _syscall2(int,     prctl,           int,         o,
-                       long,           a)
-  LSS_INLINE _syscall5(int, mount, const char *, source, const char *, target,
-                       const char *, filesystemtype, unsigned long, mountflags,
-                       const void *, data)
-  LSS_INLINE _syscall1(int, unshare, int, flags)
-  LSS_INLINE _syscall2(int, setns, int, fd, int, nstype)
-  #if defined(__NR_preadv)
-    // Defined on x86_64 / i386 only
-  LSS_INLINE _syscall5(ssize_t, preadv, unsigned long, fd,
-                       const struct kernel_iovec*, iovec,
-                       unsigned long, vlen, unsigned long, pos_l,
-                       unsigned long, pos_h)
+  #if defined(__NR_open)
+    // open is polyfilled below when not available.
+    LSS_INLINE _syscall3(int,     open,            const char*, p,
+                         int,            f, int,    m)
+  #endif
+  #if defined(__NR_poll)
+    // poll is polyfilled below when not available.
+    LSS_INLINE _syscall3(int,     poll,           struct kernel_pollfd*, u,
+                         unsigned int,   n, int,    t)
   #endif
+  #if defined(__NR_ppoll)
+    LSS_INLINE _syscall5(int, ppoll, struct kernel_pollfd *, u,
+                         unsigned int, n, const struct kernel_timespec *, t,
+                         const struct kernel_sigset_t *, sigmask, size_t, s)
+  #endif
+  LSS_INLINE _syscall5(int,     prctl,           int,         option,
+                       unsigned long,  arg2,
+                       unsigned long,  arg3,
+                       unsigned long,  arg4,
+                       unsigned long,  arg5)
   LSS_INLINE _syscall4(long,    ptrace,          int,         r,
                        pid_t,          p, void *, a, void *, d)
-  #if defined(__NR_pwritev)
-    // Defined on x86_64 / i386 only
-  LSS_INLINE _syscall5(ssize_t, pwritev, unsigned long, fd,
-                       const struct kernel_iovec*, iovec,
-                       unsigned long, vlen, unsigned long, pos_l,
-                       unsigned long, pos_h)
-  #endif
   #if defined(__NR_quotactl)
     // Defined on x86_64 / i386 only
     LSS_INLINE _syscall4(int,  quotactl,  int,  cmd,  const char *, special,
@@ -2805,8 +3498,15 @@ struct kernel_io_event {
   #endif
   LSS_INLINE _syscall3(ssize_t, read,            int,         f,
                        void *,         b, size_t, c)
-  LSS_INLINE _syscall3(int,     readlink,        const char*, p,
-                       char*,          b, size_t, s)
+  #if defined(__NR_readlink)
+    // readlink is polyfilled below when not available.
+    LSS_INLINE _syscall3(int,     readlink,        const char*, p,
+                         char*,          b, size_t, s)
+  #endif
+  #if defined(__NR_readlinkat)
+    LSS_INLINE _syscall4(int, readlinkat, int, d, const char *, p, char *, b,
+                         size_t, s)
+  #endif
   LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
                        const struct kernel_sigaction*, a,
                        struct kernel_sigaction*, o, size_t,   c)
@@ -2814,10 +3514,11 @@ struct kernel_io_event {
                        size_t,         c)
   LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
                        const struct kernel_sigset_t*,  s,
-                       struct kernel_sigset_t*,        o, size_t, c);
-  LSS_INLINE _syscall1(int, rt_sigreturn,        unsigned long, u);
+                       struct kernel_sigset_t*,        o, size_t, c)
   LSS_INLINE _syscall2(int, rt_sigsuspend,
-                       const struct kernel_sigset_t*, s,  size_t, c);
+                       const struct kernel_sigset_t*, s,  size_t, c)
+  LSS_INLINE _syscall4(int, rt_sigtimedwait, const struct kernel_sigset_t*, s,
+                       siginfo_t*, i, const struct timespec*, t, size_t, c)
   LSS_INLINE _syscall3(int,     sched_getaffinity,pid_t,      p,
                        unsigned int,   l, unsigned long *, m)
   LSS_INLINE _syscall3(int,     sched_setaffinity,pid_t,      p,
@@ -2842,25 +3543,30 @@ struct kernel_io_event {
   LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
                        const stack_t*, o)
   #if defined(__NR_sigreturn)
-  LSS_INLINE _syscall1(int,     sigreturn,       unsigned long, u);
+    LSS_INLINE _syscall1(int,     sigreturn,       unsigned long, u)
+  #endif
+  #if defined(__NR_stat)
+    // stat is polyfilled below when not available.
+    LSS_INLINE _syscall2(int,     stat,            const char*, f,
+                        struct kernel_stat*,   b)
   #endif
-  LSS_INLINE _syscall2(int,     stat,            const char*, f,
-                      struct kernel_stat*,   b)
   LSS_INLINE _syscall2(int,     statfs,          const char*, f,
                       struct kernel_statfs*, b)
   LSS_INLINE _syscall3(int,     tgkill,          pid_t,       p,
                        pid_t,          t, int,            s)
   LSS_INLINE _syscall2(int,     tkill,           pid_t,       p,
                        int,            s)
+  #if defined(__NR_unlink)
+    // unlink is polyfilled below when not available.
+    LSS_INLINE _syscall1(int,     unlink,           const char*, f)
+  #endif
   LSS_INLINE _syscall3(ssize_t, write,            int,        f,
                        const void *,   b, size_t, c)
   LSS_INLINE _syscall3(ssize_t, writev,           int,        f,
                        const struct kernel_iovec*, v, size_t, c)
-  LSS_INLINE _syscall1(int,     umask,           unsigned,    m)
-  LSS_INLINE _syscall1(int,     unlink,          const char*, f)
   #if defined(__NR_getcpu)
     LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
-                         unsigned *, node, void *, unused);
+                         unsigned *, node, void *, unused)
   #endif
   #if defined(__x86_64__) ||                                                  \
      (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
@@ -2879,8 +3585,85 @@ struct kernel_io_event {
     LSS_INLINE _syscall4(int, socketpair,         int,   d,
                          int,                     t, int,       p, int*, s)
   #endif
+  #if defined(__NR_fadvise64)
+    #if defined(__x86_64__)
+    /* Need to make sure loff_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, loff_t len,
+                                       int advice) {
+      LSS_BODY(4, int, fadvise64, LSS_SYSCALL_ARG(fd), (uint64_t)(offset),
+                                  (uint64_t)(len), LSS_SYSCALL_ARG(advice));
+    }
+    #else
+    LSS_INLINE _syscall4(int, fadvise64,
+                         int, fd, loff_t, offset, loff_t, len, int, advice)
+    #endif
+  #elif defined(__i386__)
+    #define __NR__fadvise64_64 __NR_fadvise64_64
+    LSS_INLINE _syscall6(int, _fadvise64_64, int, fd,
+                         unsigned, offset_lo, unsigned, offset_hi,
+                         unsigned, len_lo, unsigned, len_hi,
+                         int, advice)
+
+    LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
+                                       loff_t len, int advice) {
+      return LSS_NAME(_fadvise64_64)(fd,
+                                     (unsigned)offset, (unsigned)(offset >>32),
+                                     (unsigned)len, (unsigned)(len >> 32),
+                                     advice);
+    }
+
+  #elif defined(__s390__) && !defined(__s390x__)
+    #define __NR__fadvise64_64 __NR_fadvise64_64
+    struct kernel_fadvise64_64_args {
+      int fd;
+      long long offset;
+      long long len;
+      int advice;
+    };
+
+    LSS_INLINE _syscall1(int, _fadvise64_64,
+                         struct kernel_fadvise64_64_args *args)
+
+    LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
+                                       loff_t len, int advice) {
+      struct kernel_fadvise64_64_args args = { fd, offset, len, advice };
+      return LSS_NAME(_fadvise64_64)(&args);
+    }
+  #endif
+  #if defined(__NR_fallocate)
+    #if defined(__x86_64__)
+    /* Need to make sure loff_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE int LSS_NAME(fallocate)(int f, int mode, loff_t offset,
+                                       loff_t len) {
+      LSS_BODY(4, int, fallocate, LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(mode),
+                                  (uint64_t)(offset), (uint64_t)(len));
+    }
+    #elif (defined(__i386__) || (defined(__s390__) && !defined(__s390x__)) \
+           || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) \
+           || (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) \
+           || defined(__PPC__))
+    #define __NR__fallocate __NR_fallocate
+    LSS_INLINE _syscall6(int, _fallocate, int, fd,
+                         int, mode,
+                         unsigned, offset_lo, unsigned, offset_hi,
+                         unsigned, len_lo, unsigned, len_hi)
 
-  #if defined(__x86_64__) || defined(__PPC__)
+    LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode,
+                                       loff_t offset, loff_t len) {
+      union { loff_t off; unsigned w[2]; } o = { offset }, l = { len };
+      return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]);
+    }
+    #else
+    LSS_INLINE _syscall4(int, fallocate,
+                         int, f, int, mode, loff_t, offset, loff_t, len)
+    #endif
+  #endif
+  #if defined(__NR_newfstatat)
+    LSS_INLINE _syscall4(int, newfstatat,         int,   d,
+                         const char *,            p,
+                         struct kernel_stat*,     b, int, f)
+  #endif
+  #if defined(__x86_64__) || defined(__s390x__)
     LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid,
                                          gid_t *egid,
                                          gid_t *sgid) {
@@ -2893,10 +3676,6 @@ struct kernel_io_event {
       return LSS_NAME(getresuid)(ruid, euid, suid);
     }
 
-    LSS_INLINE _syscall4(int, newfstatat,         int,   d,
-                         const char *,            p,
-                        struct kernel_stat*,       b, int, f)
-
     LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
       return LSS_NAME(setfsgid)(gid);
     }
@@ -2912,21 +3691,11 @@ struct kernel_io_event {
     LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
       return LSS_NAME(setresuid)(ruid, euid, suid);
     }
-  #endif // defined(__x86_64__) || defined(__PPC__)
-
-  #if defined(__x86_64__) || defined(__PPC64__)
-    LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode,
-                         loff_t, offset, loff_t, len)
-
-    LSS_INLINE _syscall6(void*, mmap,              void*, s,
-                         size_t,                   l, int,               p,
-                         int,                      f, int,               d,
-                         __off64_t,                o)
 
     LSS_INLINE int LSS_NAME(sigaction)(int signum,
                                        const struct kernel_sigaction *act,
                                        struct kernel_sigaction *oldact) {
-    #if defined(__x86_64__)
+      #if defined(__x86_64__)
       /* On x86_64, the kernel requires us to always set our own
        * SA_RESTORER in order to be able to return from a signal handler.
        * This function must have a "magic" signature that the "gdb"
@@ -2938,77 +3707,67 @@ struct kernel_io_event {
         a.sa_restorer = LSS_NAME(restore_rt)();
         return LSS_NAME(rt_sigaction)(signum, &a, oldact,
                                       (KERNEL_NSIG+7)/8);
-      } else {
+      } else
+      #endif
         return LSS_NAME(rt_sigaction)(signum, act, oldact,
                                       (KERNEL_NSIG+7)/8);
-      }
-    #else
-      return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8);
-    #endif
     }
 
     LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
       return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
     }
 
+    LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
+      return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
+    }
+  #endif
+  #if defined(__NR_rt_sigprocmask)
     LSS_INLINE int LSS_NAME(sigprocmask)(int how,
                                          const struct kernel_sigset_t *set,
                                          struct kernel_sigset_t *oldset) {
       return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
     }
-
-    LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
-      return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
+  #endif
+  #if defined(__NR_rt_sigtimedwait)
+    LSS_INLINE int LSS_NAME(sigtimedwait)(const struct kernel_sigset_t *set,
+                                          siginfo_t *info,
+                                          const struct timespec *timeout) {
+      return LSS_NAME(rt_sigtimedwait)(set, info, timeout, (KERNEL_NSIG+7)/8);
     }
-  #endif /* defined(__x86_64__) || defined(__PPC64__) */
-
-  #if defined(__x86_64__) || \
-      defined(__arm__) || \
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
+  #endif
+  #if defined(__NR_wait4)
     LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
                          int*,                    s, int,       o,
                         struct kernel_rusage*,     r)
-
-    LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
-      return LSS_NAME(wait4)(pid, status, options, 0);
-    }
   #endif
-  #if defined(__x86_64__)|| \
-      defined(__ARM_ARCH_3__) || defined(__ARM_ARCH_5T__) || \
-      defined(__mips__) || defined(__PPC__)
-    LSS_INLINE _syscall2(int,     setgroups,     size_t,      c,
-                         const gid_t *,  g)
-  #endif
-  #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
-      defined(__PPC__)
+  #if defined(__NR_openat)
     LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
+  #endif
+  #if defined(__NR_unlinkat)
     LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f)
   #endif
-  #if defined(__i386__) || defined(__arm__)
+  #if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \
+      (defined(__s390__) && !defined(__s390x__))
     #define __NR__getresgid32 __NR_getresgid32
     #define __NR__getresuid32 __NR_getresuid32
     #define __NR__setfsgid32  __NR_setfsgid32
     #define __NR__setfsuid32  __NR_setfsuid32
-    #define __NR__setgroups32 __NR_setgroups32
-    #define __NR__setgroups   __NR_setgroups
     #define __NR__setresgid32 __NR_setresgid32
     #define __NR__setresuid32 __NR_setresuid32
+#if defined(__ARM_EABI__)
     LSS_INLINE _syscall2(int,   ugetrlimit,        int,          r,
                         struct kernel_rlimit*, l)
+#endif
     LSS_INLINE _syscall3(int,     _getresgid32,    gid_t *,      r,
                          gid_t *,            e,    gid_t *,      s)
     LSS_INLINE _syscall3(int,     _getresuid32,    uid_t *,      r,
                          uid_t *,            e,    uid_t *,      s)
-    LSS_INLINE _syscall1(int,     _setfsgid32,      gid_t,       f)
-    LSS_INLINE _syscall1(int,     _setfsuid32,      uid_t,       f)
-    LSS_INLINE _syscall2(int,     _setgroups32,     int,         s,
-                         const unsigned int *, l)
-    LSS_INLINE _syscall2(int,     _setgroups,       size_t,      c,
-                         const unsigned short *, g)
-    LSS_INLINE _syscall3(int,     _setresgid32,     gid_t,       r,
-                         gid_t,              e,     gid_t,       s)
-    LSS_INLINE _syscall3(int,     _setresuid32,     uid_t,       r,
-                         uid_t,              e,     uid_t,       s)
+    LSS_INLINE _syscall1(int,     _setfsgid32,     gid_t,        f)
+    LSS_INLINE _syscall1(int,     _setfsuid32,     uid_t,        f)
+    LSS_INLINE _syscall3(int,     _setresgid32,    gid_t,        r,
+                         gid_t,              e,    gid_t,        s)
+    LSS_INLINE _syscall3(int,     _setresuid32,    uid_t,        r,
+                         uid_t,              e,    uid_t,        s)
 
     LSS_INLINE int LSS_NAME(getresgid32)(gid_t *rgid,
                                          gid_t *egid,
@@ -3047,7 +3806,7 @@ struct kernel_io_event {
       if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 &&
           LSS_ERRNO == ENOSYS) {
         if ((unsigned int)gid & ~0xFFFFu) {
-          LSS_ERRNO = EINVAL;
+          rc = EINVAL;
         } else {
           rc = LSS_NAME(setfsgid)(gid);
         }
@@ -3060,7 +3819,7 @@ struct kernel_io_event {
       if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 &&
           LSS_ERRNO == ENOSYS) {
         if ((unsigned int)uid & ~0xFFFFu) {
-          LSS_ERRNO = EINVAL;
+          rc = EINVAL;
         } else {
           rc = LSS_NAME(setfsuid)(uid);
         }
@@ -3068,37 +3827,6 @@ struct kernel_io_event {
       return rc;
     }
 
-
-    // We cannot allocate memory so there is a problem with building the
-    // list of groups with the proper datatype.  Older kernels have limits
-    // on the number of groups that can be set at one time of up to 32.
-    // So we have an array on the stack of size 32 where to put the groups.
-    #define LSS_SET_GROUPS_SIZE 32
-    LSS_INLINE int LSS_NAME(setgroups)(size_t size, const unsigned int *list) {
-      int rc = 0;
-      if ((rc = LSS_NAME(_setgroups32)(size, list)) < 0 &&
-          LSS_ERRNO == ENOSYS) {
-        if (size > LSS_SET_GROUPS_SIZE) {
-          LSS_ERRNO = EINVAL;
-        } else {
-          unsigned short gid_list[LSS_SET_GROUPS_SIZE];
-          int i;
-          for (i = 0; i < size; ++i) {
-            if (list[i] & ~0xFFFFu) {
-              LSS_ERRNO = EINVAL;
-              break;
-            }
-            gid_list[i] = list[i];
-          }
-          if (LSS_ERRNO != EINVAL) {
-            rc = LSS_NAME(_setgroups)(size, gid_list);
-          }
-        }
-      }
-      return rc;
-    }
-    #undef LSS_SET_GROUPS_SIZE
-
     LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
       int rc;
       if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 &&
@@ -3106,7 +3834,7 @@ struct kernel_io_event {
         if ((unsigned int)rgid & ~0xFFFFu ||
             (unsigned int)egid & ~0xFFFFu ||
             (unsigned int)sgid & ~0xFFFFu) {
-          LSS_ERRNO = EINVAL;
+          rc = EINVAL;
         } else {
           rc = LSS_NAME(setresgid)(rgid, egid, sgid);
         }
@@ -3121,7 +3849,7 @@ struct kernel_io_event {
         if ((unsigned int)ruid & ~0xFFFFu ||
             (unsigned int)euid & ~0xFFFFu ||
             (unsigned int)suid & ~0xFFFFu) {
-          LSS_ERRNO = EINVAL;
+          rc = EINVAL;
         } else {
           rc = LSS_NAME(setresuid)(ruid, euid, suid);
         }
@@ -3173,34 +3901,41 @@ struct kernel_io_event {
                 (1UL << ((signum - 1) % (8*sizeof(set->sig[0])))));
     }
   }
-  #if defined(__i386__) || \
-      defined(__arm__) || \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \
-     (defined(__PPC__) && !defined(__PPC64__))
+  #if defined(__i386__) ||                                                    \
+      defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) ||                     \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                   \
+      defined(__PPC__) ||                                                     \
+     (defined(__s390__) && !defined(__s390x__))
     #define __NR__sigaction   __NR_sigaction
     #define __NR__sigpending  __NR_sigpending
-    #define __NR__sigprocmask __NR_sigprocmask
     #define __NR__sigsuspend  __NR_sigsuspend
+    #define __NR__socketcall  __NR_socketcall
     LSS_INLINE _syscall2(int, fstat64,             int, f,
                          struct kernel_stat64 *, b)
-    LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
+    LSS_INLINE _syscall5(int, _llseek,     uint, fd,
+                         unsigned long, hi, unsigned long, lo,
                          loff_t *, res, uint, wh)
-
-    #ifndef __ARM_EABI__
-    /* Not available on ARM EABI Linux.  */
-    LSS_INLINE _syscall1(void*, mmap,              void*, a)
-    #endif
-    LSS_INLINE _syscall6(void*, mmap2,             void*, s,
+#if defined(__s390__) && !defined(__s390x__)
+    /* On s390, mmap2() arguments are passed in memory. */
+    LSS_INLINE void* LSS_NAME(_mmap2)(void *s, size_t l, int p, int f, int d,
+                                      off_t o) {
+      unsigned long buf[6] = { (unsigned long) s, (unsigned long) l,
+                               (unsigned long) p, (unsigned long) f,
+                               (unsigned long) d, (unsigned long) o };
+      LSS_REG(2, buf);
+      LSS_BODY(void*, mmap2, "0"(__r2));
+    }
+#else
+    #define __NR__mmap2 __NR_mmap2
+    LSS_INLINE _syscall6(void*, _mmap2,            void*, s,
                          size_t,                   l, int,               p,
                          int,                      f, int,               d,
                          off_t,                    o)
+#endif
     LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
                          const struct kernel_old_sigaction*,  a,
                          struct kernel_old_sigaction*,        o)
     LSS_INLINE _syscall1(int,   _sigpending, unsigned long*, s)
-    LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
-                         const unsigned long*,     s,
-                         unsigned long*,           o)
     #ifdef __PPC__
     LSS_INLINE _syscall1(int, _sigsuspend,         unsigned long, s)
     #else
@@ -3284,23 +4019,6 @@ struct kernel_io_event {
       return rc;
     }
 
-    LSS_INLINE int LSS_NAME(sigprocmask)(int how,
-                                         const struct kernel_sigset_t *set,
-                                         struct kernel_sigset_t *oldset) {
-      int olderrno = LSS_ERRNO;
-      int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        LSS_ERRNO = olderrno;
-        if (oldset) {
-          LSS_NAME(sigemptyset)(oldset);
-        }
-        rc = LSS_NAME(_sigprocmask)(how,
-                                    set ? &set->sig[0] : NULL,
-                                    oldset ? &oldset->sig[0] : NULL);
-      }
-      return rc;
-    }
-
     LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
       int olderrno = LSS_ERRNO;
       int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
@@ -3315,57 +4033,93 @@ struct kernel_io_event {
       return rc;
     }
   #endif
+  #if defined(__i386__) ||                                                    \
+      defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) ||                     \
+     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                   \
+      defined(__PPC__) ||                                                     \
+     (defined(__s390__) && !defined(__s390x__))
+    /* On these architectures, implement mmap() with mmap2(). */
+    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
+                                    int64_t o) {
+      if (o % 4096) {
+        LSS_ERRNO = EINVAL;
+        return (void *) -1;
+      }
+      return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096));
+    }
+  #elif defined(__s390x__)
+    /* On s390x, mmap() arguments are passed in memory. */
+    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
+                                    int64_t o) {
+      unsigned long buf[6] = { (unsigned long) s, (unsigned long) l,
+                               (unsigned long) p, (unsigned long) f,
+                               (unsigned long) d, (unsigned long) o };
+      LSS_REG(2, buf);
+      LSS_BODY(void*, mmap, "0"(__r2));
+    }
+  #elif defined(__x86_64__)
+    /* Need to make sure __off64_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
+                                    int64_t o) {
+      LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
+                               LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
+                               LSS_SYSCALL_ARG(d), (uint64_t)(o));
+    }
+  #else
+    /* Remaining 64-bit architectures. */
+    LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot,
+                         int, flags, int, fd, int64_t, offset)
+  #endif
   #if defined(__PPC__)
     #undef LSS_SC_LOADARGS_0
     #define LSS_SC_LOADARGS_0(dummy...)
-    /* arg1 .. arg6 are passed in an unsigned long array pointed by r4.  */
     #undef LSS_SC_LOADARGS_1
     #define LSS_SC_LOADARGS_1(arg1)                                           \
-        sc_args[0] = (unsigned long) (arg1)
+        __sc_4  = (unsigned long) (arg1)
     #undef LSS_SC_LOADARGS_2
     #define LSS_SC_LOADARGS_2(arg1, arg2)                                     \
         LSS_SC_LOADARGS_1(arg1);                                              \
-        sc_args[1] = (unsigned long) (arg2)
+        __sc_5  = (unsigned long) (arg2)
     #undef LSS_SC_LOADARGS_3
     #define LSS_SC_LOADARGS_3(arg1, arg2, arg3)                               \
         LSS_SC_LOADARGS_2(arg1, arg2);                                        \
-        sc_args[2] = (unsigned long) (arg3)
+        __sc_6  = (unsigned long) (arg3)
     #undef LSS_SC_LOADARGS_4
     #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4)                         \
         LSS_SC_LOADARGS_3(arg1, arg2, arg3);                                  \
-        sc_args[3] = (unsigned long) (arg4)
+        __sc_7  = (unsigned long) (arg4)
     #undef LSS_SC_LOADARGS_5
     #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5)                   \
         LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4);                            \
-        sc_args[4] = (unsigned long) (arg5)
-    #undef LSS_SC_LOADARGS_6
-    #define LSS_SC_LOADARGS_6(arg1, arg2, arg3, arg4, arg5, arg6)             \
-        LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5);                      \
-        sc_args[5] = (unsigned long) (arg6)
+        __sc_8  = (unsigned long) (arg5)
     #undef LSS_SC_BODY
-    /*
-     * Do a socket system call using the generic socketcall() interface.
-     * We pack arguments into an array of unsigned longs and then
-     * call socketcall() with a function number and the argument array.
-     * Although some socket calls now have their own syscall numbers,
-     * we still use socketcall() to make our code work with older kernels.
-     */
     #define LSS_SC_BODY(nr, type, opt, args...)                               \
         long __sc_ret, __sc_err;                                              \
         {                                                                     \
-          unsigned long sc_args[6];                                           \
           register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall;     \
           register unsigned long __sc_3 __asm__ ("r3") = opt;                 \
           register unsigned long __sc_4 __asm__ ("r4");                       \
+          register unsigned long __sc_5 __asm__ ("r5");                       \
+          register unsigned long __sc_6 __asm__ ("r6");                       \
+          register unsigned long __sc_7 __asm__ ("r7");                       \
+          register unsigned long __sc_8 __asm__ ("r8");                       \
           LSS_SC_LOADARGS_##nr(args);                                         \
           __asm__ __volatile__                                                \
-              ("sc\n\t"                                                       \
+              ("stwu 1, -48(1)\n\t"                                           \
+               "stw 4, 20(1)\n\t"                                             \
+               "stw 5, 24(1)\n\t"                                             \
+               "stw 6, 28(1)\n\t"                                             \
+               "stw 7, 32(1)\n\t"                                             \
+               "stw 8, 36(1)\n\t"                                             \
+               "addi 4, 1, 20\n\t"                                            \
+               "sc\n\t"                                                       \
                "mfcr %0"                                                      \
-                 : "+r" (__sc_0),                                             \
-                   "+r" (__sc_3), "=r" (__sc_4)                               \
-                 : "2"(&sc_args)                                              \
-                 : "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12",         \
-                   "cr0", "ctr", "memory");                                   \
+                 : "=&r" (__sc_0),                                            \
+                   "=&r" (__sc_3), "=&r" (__sc_4),                            \
+                   "=&r" (__sc_5), "=&r" (__sc_6),                            \
+                   "=&r" (__sc_7), "=&r" (__sc_8)                             \
+                 : LSS_ASMINPUT_##nr                                          \
+                 : "cr0", "ctr", "memory");                                   \
           __sc_ret = __sc_3;                                                  \
           __sc_err = __sc_0;                                                  \
         }                                                                     \
@@ -3382,12 +4136,15 @@ struct kernel_io_event {
       LSS_SC_BODY(3, ssize_t, 16, s, msg, flags);
     }
 
+    // TODO(csilvers): why is this ifdef'ed out?
+#if 0
     LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
                                         int flags,
                                         const struct kernel_sockaddr *to,
                                         unsigned int tolen) {
-      LSS_SC_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
+      LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
     }
+#endif
 
     LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
       LSS_SC_BODY(2, int, 13, s, how);
@@ -3402,105 +4159,72 @@ struct kernel_io_event {
       LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
     }
   #endif
-  #if defined(__i386__) || \
-      (defined(__arm__) && !defined(__ARM_EABI__)) || \
-      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
-
-    /* See sys_socketcall in net/socket.c in kernel source.
-     * It de-multiplexes on its first arg and unpacks the arglist
-     * array in its second arg.
-     */
-    LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a)
+  #if defined(__ARM_EABI__) || defined (__aarch64__)
+    LSS_INLINE _syscall3(ssize_t, recvmsg, int, s, struct kernel_msghdr*, msg,
+                         int, flags)
+    LSS_INLINE _syscall3(ssize_t, sendmsg, int, s, const struct kernel_msghdr*,
+                         msg, int, flags)
+    LSS_INLINE _syscall6(ssize_t, sendto, int, s, const void*, buf, size_t,len,
+                         int, flags, const struct kernel_sockaddr*, to,
+                         unsigned int, tolen)
+    LSS_INLINE _syscall2(int, shutdown, int, s, int, how)
+    LSS_INLINE _syscall3(int, socket, int, domain, int, type, int, protocol)
+    LSS_INLINE _syscall4(int, socketpair, int, d, int, type, int, protocol,
+                         int*, sv)
+  #endif
+  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) ||                  \
+      defined(__s390__)
+    #define __NR__socketcall  __NR_socketcall
+    LSS_INLINE _syscall2(int,      _socketcall,    int,   c,
+                         va_list,                  a)
+    LSS_INLINE int LSS_NAME(socketcall)(int op, ...) {
+      int rc;
+      va_list ap;
+      va_start(ap, op);
+      rc = LSS_NAME(_socketcall)(op, ap);
+      va_end(ap);
+      return rc;
+    }
 
     LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
                                          int flags){
-      unsigned long args[3] = {
-        (unsigned long) s,
-        (unsigned long) msg,
-        (unsigned long) flags
-      };
-      return (ssize_t) LSS_NAME(socketcall)(17, args);
+      return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags);
     }
 
     LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
                                          const struct kernel_msghdr *msg,
                                          int flags) {
-      unsigned long args[3] = {
-        (unsigned long) s,
-        (unsigned long) msg,
-        (unsigned long) flags
-      };
-      return (ssize_t) LSS_NAME(socketcall)(16, args);
+      return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags);
     }
 
     LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
                                         int flags,
                                         const struct kernel_sockaddr *to,
                                         unsigned int tolen) {
-      unsigned long args[6] = {
-        (unsigned long) s,
-        (unsigned long) buf,
-        (unsigned long) len,
-        (unsigned long) flags,
-        (unsigned long) to,
-        (unsigned long) tolen
-      };
-      return (ssize_t) LSS_NAME(socketcall)(11, args);
+      return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen);
     }
 
     LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
-      unsigned long args[2] = {
-        (unsigned long) s,
-        (unsigned long) how
-      };
-      return LSS_NAME(socketcall)(13, args);
+      return LSS_NAME(socketcall)(13, s, how);
     }
 
     LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
-      unsigned long args[3] = {
-        (unsigned long) domain,
-        (unsigned long) type,
-        (unsigned long) protocol
-      };
-      return LSS_NAME(socketcall)(1, args);
+      return LSS_NAME(socketcall)(1, domain, type, protocol);
     }
 
     LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
                                         int sv[2]) {
-      unsigned long args[4] = {
-        (unsigned long) d,
-        (unsigned long) type,
-        (unsigned long) protocol,
-        (unsigned long) sv
-      };
-      return LSS_NAME(socketcall)(8, args);
+      return LSS_NAME(socketcall)(8, d, type, protocol, sv);
     }
-  #elif defined(__ARM_EABI__)
-    /* ARM EABI Linix does not have socketcall.  */
-    LSS_INLINE _syscall3(ssize_t, recvmsg,             int,   s,
-                         struct kernel_msghdr*,     m, int, f)
-    LSS_INLINE _syscall3(ssize_t, sendmsg,            int,   s,
-                         const struct kernel_msghdr*, m, int, f)
-    LSS_INLINE _syscall6(ssize_t, sendto,         int,   s,
-                         const void*,             b, size_t, l,
-                         int,                     f,
-                         const struct kernel_sockaddr*, to,
-                         unsigned int,            tl)
-    LSS_INLINE _syscall2(int, shutdown,           int,   s,
-                         int,                     h)
-    LSS_INLINE _syscall3(int, socket,             int,   d,
-                         int,                     t, int,       p)
-    LSS_INLINE _syscall4(int, socketpair,         int,   d,
-                         int,                     t, int,       p, int*, s)
   #endif
-  #if defined(__i386__) || (defined(__PPC__) && !defined(__PPC64__)) || \
-      defined(__arm__)
+  #if defined(__NR_fstatat64)
     LSS_INLINE _syscall4(int,   fstatat64,        int,   d,
                          const char *,      p,
                          struct kernel_stat64 *,   b,    int,   f)
   #endif
-  #if defined(__i386__) || defined(__PPC__) ||                                \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
+  #if defined(__NR_waitpid)
+    // waitpid is polyfilled below when not available.
     LSS_INLINE _syscall3(pid_t, waitpid,          pid_t, p,
                          int*,              s,    int,   o)
   #endif
@@ -3513,12 +4237,13 @@ struct kernel_io_event {
       register unsigned long __v1 __asm__("$3");
       register unsigned long __r7 __asm__("$7");
       __asm__ __volatile__ ("syscall\n"
-                            : "=&r"(__v0), "=&r"(__v1), "+r" (__r7)
+                            : "=r"(__v0), "=r"(__v1), "=r" (__r7)
                             : "0"(__v0)
                             : "$8", "$9", "$10", "$11", "$12",
-                              "$13", "$14", "$15", "$24", "memory");
+                              "$13", "$14", "$15", "$24", "$25", "memory");
       if (__r7) {
-        LSS_ERRNO = __v0;
+        unsigned long __errnovalue = __v0;
+        LSS_ERRNO = __errnovalue;
         return -1;
       } else {
         p[0] = __v0;
@@ -3526,13 +4251,18 @@ struct kernel_io_event {
         return 0;
       }
     }
-  #else
+  #elif defined(__NR_pipe)
+    // pipe is polyfilled below when not available.
     LSS_INLINE _syscall1(int,     pipe,           int *, p)
   #endif
-  /* TODO(user): see if ppc can/should support this as well              */
-  #if defined(__i386__) || \
-      defined(__arm__) || \
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
+  #if defined(__NR_pipe2)
+    LSS_INLINE _syscall2(int, pipe2, int *, pipefd, int, flags)
+  #endif
+  /* TODO(csilvers): see if ppc can/should support this as well              */
+  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+      defined(__ARM_EABI__) ||                                                \
+     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) ||                   \
+     (defined(__s390__) && !defined(__s390x__))
     #define __NR__statfs64  __NR_statfs64
     #define __NR__fstatfs64 __NR_fstatfs64
     LSS_INLINE _syscall3(int, _statfs64,     const char*, p,
@@ -3553,7 +4283,7 @@ struct kernel_io_event {
     return LSS_NAME(execve)(path, argv, (const char *const *)environ);
   }
 
-  LSS_INLINE pid_t LSS_NAME(gettid)() {
+  LSS_INLINE pid_t LSS_NAME(gettid)(void) {
     pid_t tid = LSS_NAME(_gettid)();
     if (tid != -1) {
       return tid;
@@ -3574,41 +4304,54 @@ struct kernel_io_event {
   }
 
   LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) {
-    return LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
+    /* PTRACE_DETACH can sometimes forget to wake up the tracee and it
+     * then sends job control signals to the real parent, rather than to
+     * the tracer. We reduce the risk of this happening by starting a
+     * whole new time slice, and then quickly sending a SIGCONT signal
+     * right after detaching from the tracee.
+     *
+     * We use tkill to ensure that we only issue a wakeup for the thread being
+     * detached.  Large multi threaded apps can take a long time in the kernel
+     * processing SIGCONT.
+     */
+    int rc, err;
+    LSS_NAME(sched_yield)();
+    rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0);
+    err = LSS_ERRNO;
+    LSS_NAME(tkill)(pid, SIGCONT);
+    /* Old systems don't have tkill */
+    if (LSS_ERRNO == ENOSYS)
+      LSS_NAME(kill)(pid, SIGCONT);
+    LSS_ERRNO = err;
+    return rc;
   }
 
   LSS_INLINE int LSS_NAME(raise)(int sig) {
     return LSS_NAME(kill)(LSS_NAME(getpid)(), sig);
   }
 
-  LSS_INLINE int LSS_NAME(setpgrp)() {
+  LSS_INLINE int LSS_NAME(setpgrp)(void) {
     return LSS_NAME(setpgid)(0, 0);
   }
 
-  LSS_INLINE int LSS_NAME(sysconf)(int name) {
-    extern int __getpagesize(void);
-    switch (name) {
-      case _SC_OPEN_MAX: {
-        struct kernel_rlimit limit;
+  #if defined(__x86_64__)
+    /* Need to make sure loff_t isn't truncated to 32-bits under x32.  */
+    LSS_INLINE ssize_t LSS_NAME(pread64)(int f, void *b, size_t c, loff_t o) {
+      LSS_BODY(4, ssize_t, pread64, LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(b),
+                                    LSS_SYSCALL_ARG(c), (uint64_t)(o));
+    }
 
-        /* On some systems getrlimit is obsolete, use ugetrlimit instead. */
-  #ifndef __NR_getrlimit
-        return LSS_NAME(ugetrlimit)(RLIMIT_NOFILE, &limit) < 0
-               ? 8192 : limit.rlim_cur;
-  #else
-        return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0
-               ? 8192 : limit.rlim_cur;
-  #endif
-      }
-      case _SC_PAGESIZE:
-        return __getpagesize();
-      default:
-        LSS_ERRNO = ENOSYS;
-        return -1;
+    LSS_INLINE ssize_t LSS_NAME(pwrite64)(int f, const void *b, size_t c,
+                                          loff_t o) {
+      LSS_BODY(4, ssize_t, pwrite64, LSS_SYSCALL_ARG(f), LSS_SYSCALL_ARG(b),
+                                     LSS_SYSCALL_ARG(c), (uint64_t)(o));
     }
-  }
-  #if defined(__x86_64__) ||                                                  \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64)
+
+    LSS_INLINE int LSS_NAME(readahead)(int f, loff_t o, unsigned c) {
+      LSS_BODY(3, int, readahead, LSS_SYSCALL_ARG(f), (uint64_t)(o),
+                                  LSS_SYSCALL_ARG(c));
+    }
+  #elif defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64
     LSS_INLINE _syscall4(ssize_t, pread64,        int,         f,
                          void *,         b, size_t,   c,
                          loff_t,         o)
@@ -3621,14 +4364,32 @@ struct kernel_io_event {
     #define __NR__pread64   __NR_pread64
     #define __NR__pwrite64  __NR_pwrite64
     #define __NR__readahead __NR_readahead
-    LSS_INLINE _syscall5(ssize_t, _pread64,        int,         f,
-                         void *,         b, size_t, c, unsigned, o1,
-                         unsigned, o2)
-    LSS_INLINE _syscall5(ssize_t, _pwrite64,       int,         f,
-                         const void *,   b, size_t, c, unsigned, o1,
-                         long, o2)
-    LSS_INLINE _syscall4(int, _readahead,          int,         f,
-                         unsigned,       o1, unsigned, o2, size_t, c);
+    #if defined(__ARM_EABI__) || defined(__mips__)
+      /* On ARM and MIPS, a 64-bit parameter has to be in an even-odd register
+       * pair. Hence these calls ignore their fourth argument (r3) so that their
+       * fifth and sixth make such a pair (r4,r5).
+       */
+      #define LSS_LLARG_PAD 0,
+      LSS_INLINE _syscall6(ssize_t, _pread64,        int,         f,
+                           void *,         b, size_t, c,
+                           unsigned, skip, unsigned, o1, unsigned, o2)
+      LSS_INLINE _syscall6(ssize_t, _pwrite64,       int,         f,
+                           const void *,   b, size_t, c,
+                           unsigned, skip, unsigned, o1, unsigned, o2)
+      LSS_INLINE _syscall5(int, _readahead,          int,         f,
+                           unsigned,     skip,
+                           unsigned,       o1, unsigned, o2, size_t, c)
+    #else
+      #define LSS_LLARG_PAD
+      LSS_INLINE _syscall5(ssize_t, _pread64,        int,         f,
+                           void *,         b, size_t, c, unsigned, o1,
+                           unsigned, o2)
+      LSS_INLINE _syscall5(ssize_t, _pwrite64,       int,         f,
+                           const void *,   b, size_t, c, unsigned, o1,
+                           long, o2)
+      LSS_INLINE _syscall4(int, _readahead,          int,         f,
+                           unsigned,       o1, unsigned, o2, size_t, c)
+    #endif
     /* We force 64bit-wide parameters onto the stack, then access each
      * 32-bit component individually. This guarantees that we build the
      * correct parameters independent of the native byte-order of the
@@ -3637,41 +4398,136 @@ struct kernel_io_event {
     LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count,
                                          loff_t off) {
       union { loff_t off; unsigned arg[2]; } o = { off };
-      return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]);
+      return LSS_NAME(_pread64)(fd, buf, count,
+                                LSS_LLARG_PAD o.arg[0], o.arg[1]);
     }
     LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf,
                                           size_t count, loff_t off) {
       union { loff_t off; unsigned arg[2]; } o = { off };
-      return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]);
+      return LSS_NAME(_pwrite64)(fd, buf, count,
+                                 LSS_LLARG_PAD o.arg[0], o.arg[1]);
     }
     LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) {
       union { loff_t off; unsigned arg[2]; } o = { off };
-      return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len);
+      return LSS_NAME(_readahead)(fd, LSS_LLARG_PAD o.arg[0], o.arg[1], len);
     }
   #endif
-  #if defined(__NR_io_setup)
-    LSS_INLINE _syscall2(int, io_setup,
-                         int,                     maxevents,
-                         unsigned long *,         ctxp);
-    LSS_INLINE _syscall3(int, io_submit,
-                         unsigned long,           ctx_id,
-                         long,                    nr,
-                         struct kernel_iocb **,   ios);
-    LSS_INLINE _syscall5(int, io_getevents,
-                         unsigned long,            ctx_id,
-                         long,                     min_nr,
-                         long,                     nr,
-                         struct kernel_io_event *, events,
-                         struct kernel_timespec*,  timeout);
-    LSS_INLINE _syscall1(int, io_destroy,
-                         unsigned long,            ctx);
-    LSS_INLINE _syscall3(int, io_cancel,
-                         unsigned long,            ctx_id,
-                         struct kernel_iocb*,      iocb,
-                         struct kernel_io_event*,  result);
-  #endif
 #endif
 
+/*
+ * Polyfills for deprecated syscalls.
+ */
+
+#if !defined(__NR_dup2)
+  LSS_INLINE int LSS_NAME(dup2)(int s, int d) {
+    return LSS_NAME(dup3)(s, d, 0);
+  }
+#endif
+
+#if !defined(__NR_open)
+  LSS_INLINE int LSS_NAME(open)(const char *pathname, int flags, int mode) {
+    return LSS_NAME(openat)(AT_FDCWD, pathname, flags, mode);
+  }
+#endif
+
+#if !defined(__NR_unlink)
+  LSS_INLINE int LSS_NAME(unlink)(const char *pathname) {
+    return LSS_NAME(unlinkat)(AT_FDCWD, pathname, 0);
+  }
+#endif
+
+#if !defined(__NR_readlink)
+  LSS_INLINE int LSS_NAME(readlink)(const char *pathname, char *buffer,
+                                    size_t size) {
+    return LSS_NAME(readlinkat)(AT_FDCWD, pathname, buffer, size);
+  }
+#endif
+
+#if !defined(__NR_pipe)
+  LSS_INLINE int LSS_NAME(pipe)(int *pipefd) {
+    return LSS_NAME(pipe2)(pipefd, 0);
+  }
+#endif
+
+#if !defined(__NR_poll)
+  LSS_INLINE int LSS_NAME(poll)(struct kernel_pollfd *fds, unsigned int nfds,
+                                int timeout) {
+   struct kernel_timespec timeout_ts;
+   struct kernel_timespec *timeout_ts_p = NULL;
+
+    if (timeout >= 0) {
+      timeout_ts.tv_sec = timeout / 1000;
+      timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
+      timeout_ts_p = &timeout_ts;
+    }
+    return LSS_NAME(ppoll)(fds, nfds, timeout_ts_p, NULL, 0);
+  }
+#endif
+
+#if !defined(__NR_stat)
+  LSS_INLINE int LSS_NAME(stat)(const char *pathname,
+                                struct kernel_stat *buf) {
+    return LSS_NAME(newfstatat)(AT_FDCWD, pathname, buf, 0);
+  }
+#endif
+
+#if !defined(__NR_waitpid)
+  LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options) {
+    return LSS_NAME(wait4)(pid, status, options, 0);
+  }
+#endif
+
+#if !defined(__NR_fork)
+// TODO: define this in an arch-independant way instead of inlining the clone
+//       syscall body.
+
+# if defined(__aarch64__)
+  LSS_INLINE pid_t LSS_NAME(fork)(void) {
+    // No fork syscall on aarch64 - implement by means of the clone syscall.
+    // Note that this does not reset glibc's cached view of the PID/TID, so
+    // some glibc interfaces might go wrong in the forked subprocess.
+    int flags = SIGCHLD;
+    void *child_stack = NULL;
+    void *parent_tidptr = NULL;
+    void *newtls = NULL;
+    void *child_tidptr = NULL;
+
+    LSS_REG(0, flags);
+    LSS_REG(1, child_stack);
+    LSS_REG(2, parent_tidptr);
+    LSS_REG(3, newtls);
+    LSS_REG(4, child_tidptr);
+    LSS_BODY(pid_t, clone, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),
+             "r"(__r4));
+  }
+# elif defined(__x86_64__)
+  LSS_INLINE pid_t LSS_NAME(fork)(void) {
+    // Android disallows the fork syscall on x86_64 - implement by means of the
+    // clone syscall as above for aarch64.
+    int flags = SIGCHLD;
+    void *child_stack = NULL;
+    void *parent_tidptr = NULL;
+    void *newtls = NULL;
+    void *child_tidptr = NULL;
+
+    LSS_BODY(5, pid_t, clone, LSS_SYSCALL_ARG(flags),
+             LSS_SYSCALL_ARG(child_stack), LSS_SYSCALL_ARG(parent_tidptr),
+             LSS_SYSCALL_ARG(newtls), LSS_SYSCALL_ARG(child_tidptr));
+  }
+# else
+#  error missing fork polyfill for this architecture
+# endif
+#endif
+
+/* These restore the original values of these macros saved by the
+ * corresponding #pragma push_macro near the top of this file. */
+#pragma pop_macro("stat64")
+#pragma pop_macro("fstat64")
+#pragma pop_macro("lstat64")
+#pragma pop_macro("pread64")
+#pragma pop_macro("pwrite64")
+#pragma pop_macro("getdents64")
+
 #if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
 }
 #endif
diff --git a/src/kudu/gutil/spinlock_linux-inl.h b/src/kudu/gutil/spinlock_linux-inl.h
index 41ef03a..54e16a7 100644
--- a/src/kudu/gutil/spinlock_linux-inl.h
+++ b/src/kudu/gutil/spinlock_linux-inl.h
@@ -51,15 +51,10 @@ static struct InitModule {
     int x = 0;
     // futexes are ints, so we can use them only when
     // that's the same size as the lockword_ in SpinLock.
-#ifdef __arm__
-    // ARM linux doesn't support sys_futex1(void*, int, int, struct timespec*);
-    have_futex = 0;
-#else
     have_futex = (sizeof (Atomic32) == sizeof (int) &&
-                  sys_futex(&x, FUTEX_WAKE, 1, 0) >= 0);
-#endif
+                  sys_futex(&x, FUTEX_WAKE, 1, nullptr, nullptr, 0) >= 0);
     if (have_futex &&
-        sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, 0) < 0) {
+        sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, nullptr, nullptr, 0) < 0) {
       futex_private_flag = 0;
     }
   }
@@ -85,9 +80,10 @@ void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
       tm.tv_nsec *= 16;  // increase the delay; we expect explicit wakeups
       sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
                 FUTEX_WAIT | futex_private_flag,
-                value, reinterpret_cast<struct kernel_timespec *>(&tm));
+                value, reinterpret_cast<struct kernel_timespec *>(&tm),
+                nullptr, 0);
     } else {
-      nanosleep(&tm, NULL);
+      nanosleep(&tm, nullptr);
     }
     errno = save_errno;
   }
@@ -96,9 +92,11 @@ void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
 void SpinLockWake(volatile Atomic32 *w, bool all) {
   if (have_futex) {
     sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
-              FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1, 0);
+              FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1,
+              nullptr, nullptr, 0);
   }
 }
 
 } // namespace internal
 } // namespace base
+
diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc
index 03556d6..7bcb787 100644
--- a/src/kudu/util/debug-util.cc
+++ b/src/kudu/util/debug-util.cc
@@ -163,6 +163,7 @@ class CompletionFlag {
     sys_futex(reinterpret_cast<int32_t*>(&complete_),
               FUTEX_WAKE | FUTEX_PRIVATE_FLAG,
               INT_MAX, // wake all
+              nullptr, nullptr,
               0 /* ignored */);
 #endif
   }
@@ -181,7 +182,7 @@ class CompletionFlag {
       sys_futex(reinterpret_cast<int32_t*>(&complete_),
                 FUTEX_WAIT | FUTEX_PRIVATE_FLAG,
                 0, // wait if value is still 0
-                reinterpret_cast<struct kernel_timespec *>(&ts));
+                reinterpret_cast<struct kernel_timespec *>(&ts), nullptr, 0);
 #else
       sched_yield();
 #endif
diff --git a/src/kudu/util/notification.h b/src/kudu/util/notification.h
index b2d523d..968afbb 100644
--- a/src/kudu/util/notification.h
+++ b/src/kudu/util/notification.h
@@ -85,7 +85,7 @@ class Notification {
       if (s == NOTIFIED) return;
       DCHECK_EQ(s, NOT_NOTIFIED_HAS_WAITERS);
       sys_futex(&state_, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, NOT_NOTIFIED_HAS_WAITERS,
-          /* timeout */ nullptr);
+          /* timeout */ nullptr, nullptr /* ignored */, 0 /* ignored */);
     }
   }
 
@@ -94,7 +94,7 @@ class Notification {
     DCHECK_NE(s, NOTIFIED) << "may only notify once";
     if (s == NOT_NOTIFIED_HAS_WAITERS) {
       sys_futex(&state_, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, INT_MAX,
-          nullptr /* ignored */);
+          nullptr /* ignored */, nullptr /* ignored */, 0 /* ignored */);
     }
   }