From 12cbb469ea5e559b241f6c31693554829e8b8400 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Dec 2007 20:24:44 +0100 Subject: [PATCH] syslet: update to -v7 as posted by Zach Signed-off-by: Jens Axboe --- Makefile | 2 +- engines/syslet-rw.c | 307 +++++++++++++++----------------------------- fio.h | 15 --- indirect.h | 40 ++++++ syslet.h | 181 ++++++-------------------- 5 files changed, 185 insertions(+), 360 deletions(-) create mode 100644 indirect.h diff --git a/Makefile b/Makefile index 2acc918d..aceee90a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ #CC = /opt/intel/cce/9.1.045/bin/icc -CC = gcc -W +CC = gcc DEBUGFLAGS = -D_FORTIFY_SOURCE=2 OPTFLAGS= -O2 -g $(EXTFLAGS) CFLAGS = -Wwrite-strings -Wall -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 $(OPTFLAGS) $(DEBUGFLAGS) -rdynamic diff --git a/engines/syslet-rw.c b/engines/syslet-rw.c index 2a159d08..8a1ab4f0 100644 --- a/engines/syslet-rw.c +++ b/engines/syslet-rw.c @@ -10,9 +10,12 @@ #include #include #include +#include #include #include "../fio.h" +#include "../indirect.h" +#include "../syslet.h" #ifdef FIO_HAVE_SYSLET @@ -24,78 +27,67 @@ #define __NR_fio_pwrite __NR_pwrite #endif -#define ATOM_TO_IOU(p) ((struct io_u *) (unsigned long) (p)) - struct syslet_data { struct io_u **events; unsigned int nr_events; - struct async_head_user ahu; - struct syslet_uatom **ring; - - struct syslet_uatom *head, *tail; + struct syslet_ring *ring; + void *stack; }; -static void fio_syslet_complete_atom(struct thread_data *td, - struct syslet_uatom *atom) +static void fio_syslet_complete(struct thread_data *td, struct io_u *io_u) { struct syslet_data *sd = td->io_ops->data; - struct syslet_uatom *last; - struct io_u *io_u; - - /* - * complete from the beginning of the sequence up to (and - * including) this atom - */ - last = atom; - io_u = ATOM_TO_IOU(atom); - atom = io_u->req.head; - - /* - * now complete in right order - */ - do { - long ret; - io_u = ATOM_TO_IOU(atom); - ret = *(long *) (unsigned long) atom->ret_ptr; - if (ret >= 0) - io_u->resid = io_u->xfer_buflen - ret; - else if (ret < 0) - io_u->error = ret; - - assert(sd->nr_events < td->o.iodepth); - sd->events[sd->nr_events++] = io_u; + assert(sd->nr_events < td->o.iodepth); + sd->events[sd->nr_events++] = io_u; +} - if (atom == last) - break; +static void syslet_complete_nr(struct thread_data *td, unsigned int nr) +{ + struct syslet_data *sd = td->io_ops->data; + unsigned int i; - atom = (struct syslet_uatom *) (unsigned long) atom->next; - } while (1); + for (i = 0; i < nr; i++) { + unsigned int idx = (i + sd->ring->user_tail) % td->o.iodepth; + struct syslet_completion *comp = &sd->ring->comp[idx]; + struct io_u *io_u = (struct io_u *) (long) comp->caller_data; - assert(!last->next); + io_u->resid = io_u->xfer_buflen - comp->status; + fio_syslet_complete(td, io_u); + } } -/* - * Inspect the ring to see if we have completed events - */ -static void fio_syslet_complete(struct thread_data *td) + +static void fio_syslet_wait_for_events(struct thread_data *td) { struct syslet_data *sd = td->io_ops->data; + struct syslet_ring *ring = sd->ring; + unsigned int events; + events = 0; do { - struct syslet_uatom *atom; + unsigned int kh = ring->kernel_head; + int ret; - atom = sd->ring[sd->ahu.user_ring_idx]; - if (!atom) - break; + /* + * first reap events that are already completed + */ + if (ring->user_tail != kh) { + unsigned int nr = kh - ring->user_tail; - sd->ring[sd->ahu.user_ring_idx] = NULL; - if (++sd->ahu.user_ring_idx == td->o.iodepth) - sd->ahu.user_ring_idx = 0; + syslet_complete_nr(td, nr); + events += nr; + ring->user_tail = kh; + continue; + } - fio_syslet_complete_atom(td, atom); - } while (1); + /* + * block waiting for at least one event + */ + ret = syscall(__NR_syslet_ring_wait, ring, ring->user_tail); + assert(!ret); + } while (!events); } static int fio_syslet_getevents(struct thread_data *td, int min, @@ -106,20 +98,13 @@ static int fio_syslet_getevents(struct thread_data *td, int min, long ret; do { - fio_syslet_complete(td); - /* * do we have enough immediate completions? */ if (sd->nr_events >= (unsigned int) min) break; - /* - * OK, we need to wait for some events... - */ - ret = async_wait(1, sd->ahu.user_ring_idx, &sd->ahu); - if (ret < 0) - return -errno; + fio_syslet_wait_for_events(td); } while (1); ret = sd->nr_events; @@ -134,35 +119,16 @@ static struct io_u *fio_syslet_event(struct thread_data *td, int event) return sd->events[event]; } -static void init_atom(struct syslet_uatom *atom, int nr, void *arg0, - void *arg1, void *arg2, void *arg3, void *ret_ptr, - unsigned long flags, void *priv) -{ - atom->flags = flags; - atom->nr = nr; - atom->ret_ptr = (uint64_t) (unsigned long) ret_ptr; - atom->next = 0; - atom->arg_ptr[0] = (uint64_t) (unsigned long) arg0; - atom->arg_ptr[1] = (uint64_t) (unsigned long) arg1; - atom->arg_ptr[2] = (uint64_t) (unsigned long) arg2; - atom->arg_ptr[3] = (uint64_t) (unsigned long) arg3; - atom->arg_ptr[4] = 0; - atom->arg_ptr[5] = 0; - atom->private = (uint64_t) (unsigned long) priv; -} - -/* - * Use seek atom for sync - */ -static void fio_syslet_prep_sync(struct io_u *io_u, struct fio_file *f) +static void fio_syslet_prep_sync(struct fio_file *f, + struct indirect_registers *regs) { - init_atom(&io_u->req.atom, __NR_fsync, &f->fd, NULL, NULL, NULL, - &io_u->req.ret, 0, io_u); + FILL_IN(*regs, __NR_fsync, (long) f->fd); } -static void fio_syslet_prep_rw(struct io_u *io_u, struct fio_file *f) +static void fio_syslet_prep_rw(struct io_u *io_u, struct fio_file *f, + struct indirect_registers *regs) { - int nr; + long nr; /* * prepare rw @@ -172,139 +138,74 @@ static void fio_syslet_prep_rw(struct io_u *io_u, struct fio_file *f) else nr = __NR_fio_pwrite; - init_atom(&io_u->req.atom, nr, &f->fd, &io_u->xfer_buf, - &io_u->xfer_buflen, &io_u->offset, &io_u->req.ret, 0, io_u); + FILL_IN(*regs, nr, (long) f->fd, (long) io_u->xfer_buf, + (long) io_u->xfer_buflen, (long) io_u->offset); } -static int fio_syslet_prep(struct thread_data fio_unused *td, struct io_u *io_u) +static void fio_syslet_prep(struct io_u *io_u, struct indirect_registers *regs) { struct fio_file *f = io_u->file; if (io_u->ddir == DDIR_SYNC) - fio_syslet_prep_sync(io_u, f); + fio_syslet_prep_sync(f, regs); else - fio_syslet_prep_rw(io_u, f); - - return 0; -} - -static void cachemiss_thread_start(void) -{ - while (1) - async_thread(NULL, NULL); -} - -#define THREAD_STACK_SIZE (16384) - -static unsigned long thread_stack_alloc() -{ - return (unsigned long) malloc(THREAD_STACK_SIZE) + THREAD_STACK_SIZE; -} - -static void fio_syslet_queued(struct thread_data *td, struct syslet_data *sd) -{ - struct syslet_uatom *atom; - struct timeval now; - - fio_gettime(&now, NULL); - - atom = sd->head; - while (atom) { - struct io_u *io_u = ATOM_TO_IOU(atom); - - memcpy(&io_u->issue_time, &now, sizeof(now)); - io_u_queued(td, io_u); - atom = (struct syslet_uatom *) (unsigned long) atom->next; - } + fio_syslet_prep_rw(io_u, f, regs); } -static int fio_syslet_commit(struct thread_data *td) +static void ret_func(void) { - struct syslet_data *sd = td->io_ops->data; - struct syslet_uatom *done; - - if (!sd->head) - return 0; - - assert(!sd->tail->next); - - if (!sd->ahu.new_thread_stack) - sd->ahu.new_thread_stack = thread_stack_alloc(); - - fio_syslet_queued(td, sd); - - /* - * On sync completion, the atom is returned. So on NULL return - * it's queued asynchronously. - */ - done = async_exec(sd->head, &sd->ahu); - - if (done == (void *) -1) { - log_err("fio: syslets don't appear to work\n"); - return -1; - } - - sd->head = sd->tail = NULL; - - if (done) - fio_syslet_complete_atom(td, done); - - return 0; + syscall(__NR_exit); } static int fio_syslet_queue(struct thread_data *td, struct io_u *io_u) { struct syslet_data *sd = td->io_ops->data; + union indirect_params params; + struct indirect_registers regs; + int ret; fio_ro_check(td, io_u); - if (sd->tail) { - sd->tail->next = (uint64_t) (unsigned long) &io_u->req.atom; - sd->tail = &io_u->req.atom; - } else - sd->head = sd->tail = (struct syslet_uatom *)&io_u->req.atom; - - io_u->req.head = sd->head; - return FIO_Q_QUEUED; -} - -static int async_head_init(struct syslet_data *sd, unsigned int depth) -{ - unsigned long ring_size; + memset(¶ms, 0, sizeof(params)); + fill_syslet_args(¶ms.syslet, sd->ring, (long)io_u, ret_func, sd->stack); - memset(&sd->ahu, 0, sizeof(struct async_head_user)); + fio_syslet_prep(io_u, ®s); - ring_size = sizeof(struct syslet_uatom *) * depth; - sd->ring = malloc(ring_size); - memset(sd->ring, 0, ring_size); - - sd->ahu.user_ring_idx = 0; - sd->ahu.completion_ring_ptr = (uint64_t) (unsigned long) sd->ring; - sd->ahu.ring_size_bytes = ring_size; - sd->ahu.head_stack = thread_stack_alloc(); - sd->ahu.head_ip = (uint64_t) (unsigned long) cachemiss_thread_start; - sd->ahu.new_thread_ip = (uint64_t) (unsigned long) cachemiss_thread_start; - sd->ahu.new_thread_stack = thread_stack_alloc(); - - return 0; -} + ret = syscall(__NR_indirect, ®s, ¶ms, sizeof(params), 0); + if (ret == (int) io_u->xfer_buflen) { + /* + * completed sync, account. this also catches fsync(). + */ + return FIO_Q_COMPLETED; + } else if (ret < 0) { + /* + * queued for async execution + */ + if (errno == ESYSLETPENDING) + return FIO_Q_QUEUED; + } -static void async_head_exit(struct syslet_data *sd) -{ - free(sd->ring); + io_u->error = errno; + td_verror(td, io_u->error, "xfer"); + return FIO_Q_COMPLETED; } static int check_syslet_support(struct syslet_data *sd) { - struct syslet_uatom atom; - void *ret; + union indirect_params params; + struct indirect_registers regs; + pid_t pid, my_pid = getpid(); - init_atom(&atom, __NR_getpid, NULL, NULL, NULL, NULL, NULL, 0, NULL); - ret = async_exec(&atom, &sd->ahu); - if (ret == (void *) -1) - return 1; + memset(¶ms, 0, sizeof(params)); + fill_syslet_args(¶ms.syslet, sd->ring, 0, ret_func, sd->stack); - return 0; + FILL_IN(regs, __NR_getpid); + + pid = syscall(__NR_indirect, ®s, ¶ms, sizeof(params), 0); + if (pid == my_pid) + return 0; + + return 1; } static void fio_syslet_cleanup(struct thread_data *td) @@ -312,8 +213,9 @@ static void fio_syslet_cleanup(struct thread_data *td) struct syslet_data *sd = td->io_ops->data; if (sd) { - async_head_exit(sd); free(sd->events); + free(sd->ring); + free(sd->stack); free(sd); td->io_ops->data = NULL; } @@ -322,24 +224,29 @@ static void fio_syslet_cleanup(struct thread_data *td) static int fio_syslet_init(struct thread_data *td) { struct syslet_data *sd; + void *ring, *stack; sd = malloc(sizeof(*sd)); memset(sd, 0, sizeof(*sd)); + sd->events = malloc(sizeof(struct io_u *) * td->o.iodepth); memset(sd->events, 0, sizeof(struct io_u *) * td->o.iodepth); - - /* - * This will handily fail for kernels where syslet isn't available - */ - if (async_head_init(sd, td->o.iodepth)) { - free(sd->events); - free(sd); + if (posix_memalign(&ring, sizeof(uint64_t), sizeof(struct syslet_ring))) return 1; - } + if (posix_memalign(&stack, page_size, page_size)) + return 1; + + sd->ring = ring; + sd->stack = stack; + + memset(sd->ring, 0, sizeof(*sd->ring)); + sd->ring->elements = td->o.iodepth; if (check_syslet_support(sd)) { log_err("fio: syslets do not appear to work\n"); free(sd->events); + free(sd->ring); + free(sd->stack); free(sd); return 1; } @@ -352,9 +259,7 @@ static struct ioengine_ops ioengine = { .name = "syslet-rw", .version = FIO_IOOPS_VERSION, .init = fio_syslet_init, - .prep = fio_syslet_prep, .queue = fio_syslet_queue, - .commit = fio_syslet_commit, .getevents = fio_syslet_getevents, .event = fio_syslet_event, .cleanup = fio_syslet_cleanup, diff --git a/fio.h b/fio.h index 2e43473f..7a0f56d3 100644 --- a/fio.h +++ b/fio.h @@ -23,10 +23,6 @@ #include "os/os.h" #include "mutex.h" -#ifdef FIO_HAVE_SYSLET -#include "syslet.h" -#endif - #ifdef FIO_HAVE_GUASI #include #endif @@ -99,14 +95,6 @@ struct io_piece { }; }; -#ifdef FIO_HAVE_SYSLET -struct syslet_req { - struct syslet_uatom atom; /* the atom to submit */ - struct syslet_uatom *head; /* head of the sequence */ - long ret; /* syscall return value */ -}; -#endif - enum { IO_U_F_FREE = 1 << 0, IO_U_F_FLIGHT = 1 << 1, @@ -128,9 +116,6 @@ struct io_u { #ifdef FIO_HAVE_SGIO struct sg_io_hdr hdr; #endif -#ifdef FIO_HAVE_SYSLET - struct syslet_req req; -#endif #ifdef FIO_HAVE_GUASI guasi_req_t greq; #endif diff --git a/indirect.h b/indirect.h new file mode 100644 index 00000000..fba6b6be --- /dev/null +++ b/indirect.h @@ -0,0 +1,40 @@ +#ifndef _INDIRECT_H_ +#define _INDIRECT_H_ + +#include "syslet.h" + +union indirect_params { + struct { + u32 flags; + } file_flags; + struct syslet_args syslet; +}; + +#ifdef __x86_64__ +# define __NR_indirect 286 +struct indirect_registers { + u64 rax; + u64 rdi; + u64 rsi; + u64 rdx; + u64 r10; + u64 r8; + u64 r9; +}; +#elif defined __i386__ +# define __NR_indirect 325 +struct indirect_registers { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esi; + u32 edi; + u32 ebp; +}; +#endif + +#define FILL_IN(var, values...) \ + (var) = (struct indirect_registers) { values, } + +#endif diff --git a/syslet.h b/syslet.h index ded5c4a7..16f81d2f 100644 --- a/syslet.h +++ b/syslet.h @@ -1,155 +1,50 @@ -#ifndef _LINUX_SYSLET_H -#define _LINUX_SYSLET_H -/* - * The syslet subsystem - asynchronous syscall execution support. - * - * Started by Ingo Molnar: - * - * Copyright (C) 2007 Red Hat, Inc., Ingo Molnar - * - * User-space API/ABI definitions: - */ +#ifndef _SYSLET_H_ +#define _SYSLET_H_ -#ifndef __user -# define __user -#endif +#include "kcompat.h" -/* - * This is the 'Syslet Atom' - the basic unit of execution - * within the syslet framework. A syslet always represents - * a single system-call plus its arguments, plus has conditions - * attached to it that allows the construction of larger - * programs from these atoms. User-space variables can be used - * (for example a loop index) via the special sys_umem*() syscalls. - * - * Arguments are implemented via pointers to arguments. This not - * only increases the flexibility of syslet atoms (multiple syslets - * can share the same variable for example), but is also an - * optimization: copy_uatom() will only fetch syscall parameters - * up until the point it meets the first NULL pointer. 50% of all - * syscalls have 2 or less parameters (and 90% of all syscalls have - * 4 or less parameters). - * - * [ Note: since the argument array is at the end of the atom, and the - * kernel will not touch any argument beyond the final NULL one, atoms - * might be packed more tightly. (the only special case exception to - * this rule would be SKIP_TO_NEXT_ON_STOP atoms, where the kernel will - * jump a full syslet_uatom number of bytes.) ] - */ -struct syslet_uatom { - uint32_t flags; - uint32_t nr; - uint64_t ret_ptr; - uint64_t next; - uint64_t arg_ptr[6]; - /* - * User-space can put anything in here, kernel will not - * touch it: - */ - uint64_t private; +struct syslet_frame { + u64 ip; + u64 sp; }; -/* - * Flags to modify/control syslet atom behavior: - */ - -/* - * Immediately queue this syslet asynchronously - do not even - * attempt to execute it synchronously in the user context: - */ -#define SYSLET_ASYNC 0x00000001 - -/* - * Never queue this syslet asynchronously - even if synchronous - * execution causes a context-switching: - */ -#define SYSLET_SYNC 0x00000002 - -/* - * Do not queue the syslet in the completion ring when done. - * - * ( the default is that the final atom of a syslet is queued - * in the completion ring. ) - * - * Some syscalls generate implicit completion events of their - * own. - */ -#define SYSLET_NO_COMPLETE 0x00000004 - -/* - * Execution control: conditions upon the return code - * of the just executed syslet atom. 'Stop' means syslet - * execution is stopped and the atom is put into the - * completion ring: - */ -#define SYSLET_STOP_ON_NONZERO 0x00000008 -#define SYSLET_STOP_ON_ZERO 0x00000010 -#define SYSLET_STOP_ON_NEGATIVE 0x00000020 -#define SYSLET_STOP_ON_NON_POSITIVE 0x00000040 - -#define SYSLET_STOP_MASK \ - ( SYSLET_STOP_ON_NONZERO | \ - SYSLET_STOP_ON_ZERO | \ - SYSLET_STOP_ON_NEGATIVE | \ - SYSLET_STOP_ON_NON_POSITIVE ) +struct syslet_args { + u64 ring_ptr; + u64 caller_data; + struct syslet_frame frame; +}; -/* - * Special modifier to 'stop' handling: instead of stopping the - * execution of the syslet, the linearly next syslet is executed. - * (Normal execution flows along atom->next, and execution stops - * if atom->next is NULL or a stop condition becomes true.) - * - * This is what allows true branches of execution within syslets. - */ -#define SYSLET_SKIP_TO_NEXT_ON_STOP 0x00000080 +struct syslet_completion { + u64 status; + u64 caller_data; +}; -/* - * This is the (per-user-context) descriptor of the async completion - * ring. This gets passed in to sys_async_exec(): - */ -struct async_head_user { - /* - * Current completion ring index - managed by the kernel: - */ - uint64_t kernel_ring_idx; - /* - * User-side ring index: - */ - uint64_t user_ring_idx; +struct syslet_ring { + u32 kernel_head; + u32 user_tail; + u32 elements; + u32 wait_group; + struct syslet_completion comp[0]; +}; - /* - * Ring of pointers to completed async syslets (i.e. syslets that - * generated a cachemiss and went async, returning -EASYNCSYSLET - * to the user context by sys_async_exec()) are queued here. - * Syslets that were executed synchronously (cached) are not - * queued here. - * - * Note: the final atom that generated the exit condition is - * queued here. Normally this would be the last atom of a syslet. - */ - uint64_t completion_ring_ptr; +#ifdef __x86_64__ +#define __NR_syslet_ring_wait 287 +#elif defined __i386__ +#define __NR_syslet_ring_wait 326 +#endif - /* - * Ring size in bytes: - */ - uint64_t ring_size_bytes; +#define ESYSLETPENDING 132 - /* - * The head task can become a cachemiss thread later on - * too, if it blocks - so it needs its separate thread - * stack and start address too: - */ - uint64_t head_stack; - uint64_t head_ip; +typedef void (*syslet_return_func_t)(void); - /* - * Newly started async kernel threads will take their - * user stack and user start address from here. User-space - * code has to check for new_thread_stack going to NULL - * and has to refill it with a new stack if that happens. - */ - uint64_t new_thread_stack; - uint64_t new_thread_ip; -}; +void fill_syslet_args(struct syslet_args *args, struct syslet_ring *ring, + uint64_t caller_data, syslet_return_func_t func, + void *stack) +{ + args->ring_ptr = (u64)(unsigned long)ring; + args->caller_data = caller_data; + args->frame.ip = (u64)(unsigned long)func; + args->frame.sp = (u64)(unsigned long)stack; +} #endif -- 2.25.1