#include <unistd.h>
#include <errno.h>
#include <assert.h>
+#include <malloc.h>
#include <asm/unistd.h>
#include "../fio.h"
-#include "../os.h"
+#include "../lib/fls.h"
#ifdef FIO_HAVE_SYSLET
+#ifdef __NR_pread64
+#define __NR_fio_pread __NR_pread64
+#define __NR_fio_pwrite __NR_pwrite64
+#else
+#define __NR_fio_pread __NR_pread
+#define __NR_fio_pwrite __NR_pwrite
+#endif
+
struct syslet_data {
struct io_u **events;
unsigned int nr_events;
- struct async_head_user ahu;
- struct syslet_uatom **ring;
-
- struct syslet_uatom *head, *tail;
+ struct syslet_ring *ring;
+ unsigned int ring_mask;
+ void *stack;
};
-static void fio_syslet_complete_atom(struct thread_data *td,
- struct syslet_uatom *atom)
+static void fio_syslet_add_event(struct thread_data *td, struct io_u *io_u)
{
struct syslet_data *sd = td->io_ops->data;
- struct syslet_uatom *last;
- struct io_u *io_u;
-
- /*
- * complete from the beginning of the sequence up to (and
- * including) this atom
- */
- last = atom;
- io_u = atom->private;
- atom = io_u->req.head;
-
- /*
- * now complete in right order
- */
- do {
- long ret;
-
- io_u = atom->private;
- ret = *atom->ret_ptr;
- if (ret >= 0)
- io_u->resid = io_u->xfer_buflen - ret;
- else if (ret < 0)
- io_u->error = ret;
-
- assert(sd->nr_events < td->iodepth);
- sd->events[sd->nr_events++] = io_u;
-
- if (atom == last)
- break;
-
- atom = atom->next;
- } while (1);
- assert(!last->next);
+ assert(sd->nr_events < td->o.iodepth);
+ sd->events[sd->nr_events++] = io_u;
}
-/*
- * Inspect the ring to see if we have completed events
- */
-static void fio_syslet_complete(struct thread_data *td)
+static void fio_syslet_add_events(struct thread_data *td, unsigned int nr)
{
struct syslet_data *sd = td->io_ops->data;
+ unsigned int i, uidx;
- do {
- struct syslet_uatom *atom;
+ uidx = sd->ring->user_tail;
+ read_barrier();
- atom = sd->ring[sd->ahu.user_ring_idx];
- if (!atom)
- break;
+ for (i = 0; i < nr; i++) {
+ unsigned int idx = (i + uidx) & sd->ring_mask;
+ struct syslet_completion *comp = &sd->ring->comp[idx];
+ struct io_u *io_u = (struct io_u *) (long) comp->caller_data;
+ long ret;
- sd->ring[sd->ahu.user_ring_idx] = NULL;
- if (++sd->ahu.user_ring_idx == td->iodepth)
- sd->ahu.user_ring_idx = 0;
+ ret = comp->status;
+ if (ret <= 0) {
+ io_u->resid = io_u->xfer_buflen;
+ io_u->error = -ret;
+ } else {
+ io_u->resid = io_u->xfer_buflen - ret;
+ io_u->error = 0;
+ }
- fio_syslet_complete_atom(td, atom);
- } while (1);
+ fio_syslet_add_event(td, io_u);
+ }
}
-static int fio_syslet_getevents(struct thread_data *td, int min,
- int fio_unused max,
- struct timespec fio_unused *t)
+static void fio_syslet_wait_for_events(struct thread_data *td)
{
struct syslet_data *sd = td->io_ops->data;
- long ret;
+ struct syslet_ring *ring = sd->ring;
do {
- fio_syslet_complete(td);
+ unsigned int kh = ring->kernel_head;
+ int ret;
/*
- * do we have enough immediate completions?
+ * first reap events that are already completed
*/
- if (sd->nr_events >= (unsigned int) min)
+ if (ring->user_tail != kh) {
+ unsigned int nr = kh - ring->user_tail;
+
+ fio_syslet_add_events(td, nr);
+ ring->user_tail = kh;
break;
+ }
/*
- * OK, we need to wait for some events...
+ * block waiting for at least one event
*/
- ret = async_wait(1, sd->ahu.user_ring_idx, &sd->ahu);
- if (ret < 0)
- return -errno;
+ ret = syscall(__NR_syslet_ring_wait, ring, ring->user_tail);
+ assert(!ret);
} while (1);
+}
+
+static int fio_syslet_getevents(struct thread_data *td, unsigned int min,
+ unsigned int fio_unused max,
+ struct timespec fio_unused *t)
+{
+ struct syslet_data *sd = td->io_ops->data;
+ long ret;
+
+ /*
+ * While we have less events than requested, block waiting for them
+ * (if we have to, there may already be more completed events ready
+ * for us - see fio_syslet_wait_for_events()
+ */
+ while (sd->nr_events < min)
+ fio_syslet_wait_for_events(td);
ret = sd->nr_events;
sd->nr_events = 0;
return sd->events[event];
}
-static void init_atom(struct syslet_uatom *atom, int nr, void *arg0,
- void *arg1, void *arg2, void *arg3, void *ret_ptr,
- unsigned long flags, void *priv)
+static void fio_syslet_prep_sync(struct fio_file *f,
+ struct indirect_registers *regs)
{
- atom->flags = flags;
- atom->nr = nr;
- atom->ret_ptr = ret_ptr;
- atom->next = NULL;
- atom->arg_ptr[0] = arg0;
- atom->arg_ptr[1] = arg1;
- atom->arg_ptr[2] = arg2;
- atom->arg_ptr[3] = arg3;
- atom->arg_ptr[4] = atom->arg_ptr[5] = NULL;
- atom->private = priv;
+ FILL_IN(*regs, __NR_fsync, (long) f->fd);
}
-/*
- * Use seek atom for sync
- */
-static void fio_syslet_prep_sync(struct io_u *io_u, struct fio_file *f)
+static void fio_syslet_prep_datasync(struct fio_file *f,
+ struct indirect_registers *regs)
{
- init_atom(&io_u->req.atom, __NR_fsync, &f->fd, NULL, NULL, NULL,
- &io_u->req.ret, 0, io_u);
+ FILL_IN(*regs, __NR_fdatasync, (long) f->fd);
}
-static void fio_syslet_prep_rw(struct io_u *io_u, struct fio_file *f)
+static void fio_syslet_prep_rw(struct io_u *io_u, struct fio_file *f,
+ struct indirect_registers *regs)
{
- int nr;
+ long nr;
/*
* prepare rw
*/
if (io_u->ddir == DDIR_READ)
- nr = __NR_pread64;
+ nr = __NR_fio_pread;
else
- nr = __NR_pwrite64;
+ nr = __NR_fio_pwrite;
- init_atom(&io_u->req.atom, nr, &f->fd, &io_u->xfer_buf,
- &io_u->xfer_buflen, &io_u->offset, &io_u->req.ret, 0, io_u);
+ FILL_IN(*regs, nr, (long) f->fd, (long) io_u->xfer_buf,
+ (long) io_u->xfer_buflen, (long) io_u->offset);
}
-static int fio_syslet_prep(struct thread_data fio_unused *td, struct io_u *io_u)
+static void fio_syslet_prep(struct io_u *io_u, struct indirect_registers *regs)
{
struct fio_file *f = io_u->file;
if (io_u->ddir == DDIR_SYNC)
- fio_syslet_prep_sync(io_u, f);
+ fio_syslet_prep_sync(f, regs);
+ else if (io_u->ddir == DDIR_DATASYNC)
+ fio_syslet_prep_datasync(f, regs);
else
- fio_syslet_prep_rw(io_u, f);
-
- return 0;
+ fio_syslet_prep_rw(io_u, f, regs);
}
-static void cachemiss_thread_start(void)
-{
- while (1)
- async_thread(NULL, NULL);
-}
-
-#define THREAD_STACK_SIZE (16384)
-
-static unsigned long thread_stack_alloc()
+static void ret_func(void)
{
- return (unsigned long) malloc(THREAD_STACK_SIZE) + THREAD_STACK_SIZE;
+ syscall(__NR_exit);
}
-static void fio_syslet_queued(struct thread_data *td, struct syslet_data *sd)
-{
- struct syslet_uatom *atom;
- struct timeval now;
-
- fio_gettime(&now, NULL);
-
- atom = sd->head;
- while (atom) {
- struct io_u *io_u = atom->private;
-
- memcpy(&io_u->issue_time, &now, sizeof(now));
- io_u_queued(td, io_u);
- atom = atom->next;
- }
-}
-
-static int fio_syslet_commit(struct thread_data *td)
+static int fio_syslet_queue(struct thread_data *td, struct io_u *io_u)
{
struct syslet_data *sd = td->io_ops->data;
- struct syslet_uatom *done;
-
- if (!sd->head)
- return 0;
-
- assert(!sd->tail->next);
+ union indirect_params params;
+ struct indirect_registers regs;
+ int ret;
- if (!sd->ahu.new_thread_stack)
- sd->ahu.new_thread_stack = thread_stack_alloc();
+ fio_ro_check(td, io_u);
- fio_syslet_queued(td, sd);
+ memset(¶ms, 0, sizeof(params));
+ fill_syslet_args(¶ms.syslet, sd->ring, (long)io_u, ret_func, sd->stack);
- /*
- * On sync completion, the atom is returned. So on NULL return
- * it's queued asynchronously.
- */
- done = async_exec(sd->head, &sd->ahu);
-
- sd->head = sd->tail = NULL;
-
- if (done)
- fio_syslet_complete_atom(td, done);
-
- return 0;
-}
+ fio_syslet_prep(io_u, ®s);
-static int fio_syslet_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct syslet_data *sd = td->io_ops->data;
-
- if (sd->tail) {
- sd->tail->next = &io_u->req.atom;
- sd->tail = &io_u->req.atom;
- } else
- sd->head = sd->tail = &io_u->req.atom;
+ ret = syscall(__NR_indirect, ®s, ¶ms, sizeof(params), 0);
+ if (ret == (int) io_u->xfer_buflen) {
+ /*
+ * completed sync, account. this also catches fsync().
+ */
+ return FIO_Q_COMPLETED;
+ } else if (ret < 0) {
+ /*
+ * queued for async execution
+ */
+ if (errno == ESYSLETPENDING)
+ return FIO_Q_QUEUED;
+ }
- io_u->req.head = sd->head;
- return FIO_Q_QUEUED;
+ io_u->error = errno;
+ td_verror(td, io_u->error, "xfer");
+ return FIO_Q_COMPLETED;
}
-static int async_head_init(struct syslet_data *sd, unsigned int depth)
+static int check_syslet_support(struct syslet_data *sd)
{
- unsigned long ring_size;
-
- memset(&sd->ahu, 0, sizeof(struct async_head_user));
+ union indirect_params params;
+ struct indirect_registers regs;
+ pid_t pid, my_pid = getpid();
- ring_size = sizeof(struct syslet_uatom *) * depth;
- sd->ring = malloc(ring_size);
- memset(sd->ring, 0, ring_size);
+ memset(¶ms, 0, sizeof(params));
+ fill_syslet_args(¶ms.syslet, sd->ring, 0, ret_func, sd->stack);
- sd->ahu.user_ring_idx = 0;
- sd->ahu.completion_ring = sd->ring;
- sd->ahu.ring_size_bytes = ring_size;
- sd->ahu.head_stack = thread_stack_alloc();
- sd->ahu.head_eip = (unsigned long) cachemiss_thread_start;
- sd->ahu.new_thread_eip = (unsigned long) cachemiss_thread_start;
+ FILL_IN(regs, __NR_getpid);
- return 0;
-}
+ pid = syscall(__NR_indirect, ®s, ¶ms, sizeof(params), 0);
+ if (pid == my_pid)
+ return 0;
-static void async_head_exit(struct syslet_data *sd)
-{
- free(sd->ring);
+ return 1;
}
static void fio_syslet_cleanup(struct thread_data *td)
struct syslet_data *sd = td->io_ops->data;
if (sd) {
- async_head_exit(sd);
free(sd->events);
+ free(sd->ring);
free(sd);
- td->io_ops->data = NULL;
}
}
static int fio_syslet_init(struct thread_data *td)
{
struct syslet_data *sd;
-
+ void *ring = NULL, *stack = NULL;
+ unsigned int ring_size, ring_nr;
sd = malloc(sizeof(*sd));
memset(sd, 0, sizeof(*sd));
- sd->events = malloc(sizeof(struct io_u *) * td->iodepth);
- memset(sd->events, 0, sizeof(struct io_u *) * td->iodepth);
+
+ sd->events = malloc(sizeof(struct io_u *) * td->o.iodepth);
+ memset(sd->events, 0, sizeof(struct io_u *) * td->o.iodepth);
/*
- * This will handily fail for kernels where syslet isn't available
+ * The ring needs to be a power-of-2, so round it up if we have to
*/
- if (async_head_init(sd, td->iodepth)) {
- free(sd->events);
- free(sd);
- return 1;
+ ring_nr = td->o.iodepth;
+ if (ring_nr & (ring_nr - 1))
+ ring_nr = 1 << __fls(ring_nr);
+
+ ring_size = sizeof(struct syslet_ring) +
+ ring_nr * sizeof(struct syslet_completion);
+ if (posix_memalign(&ring, sizeof(uint64_t), ring_size))
+ goto err_mem;
+ if (posix_memalign(&stack, page_size, page_size))
+ goto err_mem;
+
+ sd->ring = ring;
+ sd->ring_mask = ring_nr - 1;
+ sd->stack = stack;
+
+ memset(sd->ring, 0, ring_size);
+ sd->ring->elements = ring_nr;
+
+ if (!check_syslet_support(sd)) {
+ td->io_ops->data = sd;
+ return 0;
}
- td->io_ops->data = sd;
- return 0;
+ log_err("fio: syslets do not appear to work\n");
+err_mem:
+ free(sd->events);
+ if (ring)
+ free(ring);
+ if (stack)
+ free(stack);
+ free(sd);
+ return 1;
}
static struct ioengine_ops ioengine = {
.name = "syslet-rw",
.version = FIO_IOOPS_VERSION,
.init = fio_syslet_init,
- .prep = fio_syslet_prep,
.queue = fio_syslet_queue,
- .commit = fio_syslet_commit,
.getevents = fio_syslet_getevents,
.event = fio_syslet_event,
.cleanup = fio_syslet_cleanup,
.open_file = generic_open_file,
.close_file = generic_close_file,
+ .get_file_size = generic_get_file_size,
};
#else /* FIO_HAVE_SYSLET */
*/
static int fio_syslet_init(struct thread_data fio_unused *td)
{
- fprintf(stderr, "fio: syslet not available\n");
+ log_err("fio: syslet not available\n");
return 1;
}