From 2866c82d598e30604d8a92723c664ee6ced90fb0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Oct 2006 15:57:48 +0200 Subject: [PATCH] [PATCH] Separate io engines into separate loadable objects Signed-off-by: Jens Axboe --- Makefile | 8 +- engines/Makefile | 20 + engines/fio-engine-cpu.c | 23 + engines/fio-engine-libaio.c | 131 +++++ engines/fio-engine-mmap.c | 101 ++++ engines/fio-engine-posixaio.c | 188 +++++++ engines/fio-engine-sg.c | 324 ++++++++++++ engines/fio-engine-splice.c | 193 +++++++ engines/fio-engine-sync.c | 108 ++++ fio.c | 66 +-- fio.h | 47 +- init.c | 42 +- ioengines.c | 926 +--------------------------------- os.h | 9 - 14 files changed, 1178 insertions(+), 1008 deletions(-) create mode 100644 engines/Makefile create mode 100644 engines/fio-engine-cpu.c create mode 100644 engines/fio-engine-libaio.c create mode 100644 engines/fio-engine-mmap.c create mode 100644 engines/fio-engine-posixaio.c create mode 100644 engines/fio-engine-sg.c create mode 100644 engines/fio-engine-splice.c create mode 100644 engines/fio-engine-sync.c diff --git a/Makefile b/Makefile index 684e8e04..5335e145 100644 --- a/Makefile +++ b/Makefile @@ -4,12 +4,13 @@ PROGS = fio SCRIPTS = fio_generate_plots all: depend $(PROGS) $(SCRIPTS) + $(MAKE) -C engines fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o - $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt + $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt -ldl clean: - -rm -f *.o .depend cscope.out $(PROGS) + -rm -f *.o .depend cscope.out $(PROGS) engines/*.o depend: @$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend @@ -20,10 +21,13 @@ cscope: INSTALL = install prefix = /usr/local bindir = $(prefix)/bin +libdir = $(prefix)/lib/fio install: $(PROGS) $(SCRIPTS) $(INSTALL) -m755 -d $(DESTDIR)$(bindir) $(INSTALL) $(PROGS) $(SCRIPTS) $(DESTDIR)$(bindir) + $(INSTALL) -m755 -d $(DESTDIR) $(libdir) + $(INSTALL) engines/*.o $(libdir) ifneq ($(wildcard .depend),) include .depend diff --git a/engines/Makefile b/engines/Makefile new file mode 100644 index 00000000..2d8de7ab --- /dev/null +++ b/engines/Makefile @@ -0,0 +1,20 @@ +CC = gcc +CFLAGS = -Wall -O2 -W -shared -rdynamic -fPIC +ALL_CFLAGS = $(CFLAGS) -I.. -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 +LIBS = +OBJS = fio-engine-sync.o fio-engine-splice.o fio-engine-mmap.o fio-engine-libaio.o fio-engine-posixaio.o fio-engine-sg.o fio-engine-cpu.o + +all: depend $(OBJS) + +depend: + @$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend + +clean: + -rm -f *.o $(OBJS) .depend + +%.o: %.c + $(CC) $(ALL_CFLAGS) -o $*.o $< + +ifneq ($(wildcard .depend),) +include .depend +endif diff --git a/engines/fio-engine-cpu.c b/engines/fio-engine-cpu.c new file mode 100644 index 00000000..6d6fc562 --- /dev/null +++ b/engines/fio-engine-cpu.c @@ -0,0 +1,23 @@ +#include "fio.h" +#include "os.h" + +static int fio_cpuio_init(struct thread_data *td) +{ + if (!td->cpuload) { + td_vmsg(td, EINVAL, "cpu thread needs rate"); + return 1; + } else if (td->cpuload > 100) + td->cpuload = 100; + + td->read_iolog = td->write_iolog = 0; + td->fd = -1; + + return 0; +} + +struct ioengine_ops ioengine = { + .name = "cpuio", + .version = FIO_IOOPS_VERSION, + .init = fio_cpuio_init, + .flags = FIO_CPUIO, +}; diff --git a/engines/fio-engine-libaio.c b/engines/fio-engine-libaio.c new file mode 100644 index 00000000..703808b6 --- /dev/null +++ b/engines/fio-engine-libaio.c @@ -0,0 +1,131 @@ +/* + * native linux aio io engine + * + */ +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj) + +struct libaio_data { + io_context_t aio_ctx; + struct io_event *aio_events; +}; + +static int fio_libaio_sync(struct thread_data *td) +{ + return fsync(td->fd); +} + +static int fio_libaio_prep(struct thread_data *td, struct io_u *io_u) +{ + if (io_u->ddir == DDIR_READ) + io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); + else + io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); + + return 0; +} + +static struct io_u *fio_libaio_event(struct thread_data *td, int event) +{ + struct libaio_data *ld = td->io_ops->data; + + return ev_to_iou(ld->aio_events + event); +} + +static int fio_libaio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct libaio_data *ld = td->io_ops->data; + long r; + + do { + r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t); + if (r == -EAGAIN) { + usleep(100); + continue; + } else if (r == -EINTR) + continue; + else + break; + } while (1); + + return (int) r; +} + +static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct libaio_data *ld = td->io_ops->data; + struct iocb *iocb = &io_u->iocb; + long ret; + + do { + ret = io_submit(ld->aio_ctx, 1, &iocb); + if (ret == 1) + return 0; + else if (ret == -EAGAIN) + usleep(100); + else if (ret == -EINTR) + continue; + else + break; + } while (1); + + return (int) ret; + +} + +static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u) +{ + struct libaio_data *ld = td->io_ops->data; + + return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events); +} + +static void fio_libaio_cleanup(struct thread_data *td) +{ + struct libaio_data *ld = td->io_ops->data; + + if (ld) { + io_destroy(ld->aio_ctx); + if (ld->aio_events) + free(ld->aio_events); + + free(ld); + td->io_ops->data = NULL; + } +} + +static int fio_libaio_init(struct thread_data *td) +{ + struct libaio_data *ld = malloc(sizeof(*ld)); + + memset(ld, 0, sizeof(*ld)); + if (io_queue_init(td->iodepth, &ld->aio_ctx)) { + td_verror(td, errno); + return 1; + } + + ld->aio_events = malloc(td->iodepth * sizeof(struct io_event)); + td->io_ops->data = ld; + return 0; +} + +struct ioengine_ops ioengine = { + .name = "libaio", + .version = FIO_IOOPS_VERSION, + .init = fio_libaio_init, + .prep = fio_libaio_prep, + .queue = fio_libaio_queue, + .cancel = fio_libaio_cancel, + .getevents = fio_libaio_getevents, + .event = fio_libaio_event, + .cleanup = fio_libaio_cleanup, + .sync = fio_libaio_sync, +}; diff --git a/engines/fio-engine-mmap.c b/engines/fio-engine-mmap.c new file mode 100644 index 00000000..abb42bf1 --- /dev/null +++ b/engines/fio-engine-mmap.c @@ -0,0 +1,101 @@ +/* + * regular read/write sync io engine + * + */ +#include +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +struct mmapio_data { + struct io_u *last_io_u; +}; + +static int fio_mmapio_getevents(struct thread_data *td, int fio_unused min, + int max, struct timespec fio_unused *t) +{ + assert(max <= 1); + + /* + * we can only have one finished io_u for sync io, since the depth + * is always 1 + */ + if (list_empty(&td->io_u_busylist)) + return 0; + + return 1; +} + +static struct io_u *fio_mmapio_event(struct thread_data *td, int event) +{ + struct mmapio_data *sd = td->io_ops->data; + + assert(event == 0); + + return sd->last_io_u; +} + + +static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) +{ + unsigned long long real_off = io_u->offset - td->file_offset; + struct mmapio_data *sd = td->io_ops->data; + + if (io_u->ddir == DDIR_READ) + memcpy(io_u->buf, td->mmap + real_off, io_u->buflen); + else + memcpy(td->mmap + real_off, io_u->buf, io_u->buflen); + + /* + * not really direct, but should drop the pages from the cache + */ + if (td->odirect) { + if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0) + io_u->error = errno; + if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0) + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static int fio_mmapio_sync(struct thread_data *td) +{ + return msync(td->mmap, td->file_size, MS_SYNC); +} + +static void fio_mmapio_cleanup(struct thread_data *td) +{ + if (td->io_ops->data) { + free(td->io_ops->data); + td->io_ops->data = NULL; + } +} + +static int fio_mmapio_init(struct thread_data *td) +{ + struct mmapio_data *sd = malloc(sizeof(*sd)); + + sd->last_io_u = NULL; + td->io_ops->data = sd; + return 0; +} + +struct ioengine_ops ioengine = { + .name = "mmap", + .version = FIO_IOOPS_VERSION, + .init = fio_mmapio_init, + .queue = fio_mmapio_queue, + .getevents = fio_mmapio_getevents, + .event = fio_mmapio_event, + .cleanup = fio_mmapio_cleanup, + .sync = fio_mmapio_sync, + .flags = FIO_SYNCIO, +}; diff --git a/engines/fio-engine-posixaio.c b/engines/fio-engine-posixaio.c new file mode 100644 index 00000000..871db77c --- /dev/null +++ b/engines/fio-engine-posixaio.c @@ -0,0 +1,188 @@ +/* + * posix aio io engine + * + */ +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +struct posixaio_data { + struct io_u **aio_events; +}; + +static int fill_timespec(struct timespec *ts) +{ +#ifdef _POSIX_TIMERS + if (!clock_gettime(CLOCK_MONOTONIC, ts)) + return 0; + + perror("clock_gettime"); +#endif + return 1; +} + +static unsigned long long ts_utime_since_now(struct timespec *t) +{ + long long sec, nsec; + struct timespec now; + + if (fill_timespec(&now)) + return 0; + + sec = now.tv_sec - t->tv_sec; + nsec = now.tv_nsec - t->tv_nsec; + if (sec > 0 && nsec < 0) { + sec--; + nsec += 1000000000; + } + + sec *= 1000000; + nsec /= 1000; + return sec + nsec; +} + +static int fio_posixaio_sync(struct thread_data *td) +{ + return fsync(td->fd); +} + +static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u) +{ + int r = aio_cancel(td->fd, &io_u->aiocb); + + if (r == 1 || r == AIO_CANCELED) + return 0; + + return 1; +} + +static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u) +{ + struct aiocb *aiocb = &io_u->aiocb; + + aiocb->aio_fildes = td->fd; + aiocb->aio_buf = io_u->buf; + aiocb->aio_nbytes = io_u->buflen; + aiocb->aio_offset = io_u->offset; + + io_u->seen = 0; + return 0; +} + +static int fio_posixaio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct posixaio_data *pd = td->io_ops->data; + struct list_head *entry; + struct timespec start; + int r, have_timeout = 0; + + if (t && !fill_timespec(&start)) + have_timeout = 1; + + r = 0; +restart: + list_for_each(entry, &td->io_u_busylist) { + struct io_u *io_u = list_entry(entry, struct io_u, list); + int err; + + if (io_u->seen) + continue; + + err = aio_error(&io_u->aiocb); + switch (err) { + default: + io_u->error = err; + case ECANCELED: + case 0: + pd->aio_events[r++] = io_u; + io_u->seen = 1; + break; + case EINPROGRESS: + break; + } + + if (r >= max) + break; + } + + if (r >= min) + return r; + + if (have_timeout) { + unsigned long long usec; + + usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000); + if (ts_utime_since_now(&start) > usec) + return r; + } + + /* + * hrmpf, we need to wait for more. we should use aio_suspend, for + * now just sleep a little and recheck status of busy-and-not-seen + */ + usleep(1000); + goto restart; +} + +static struct io_u *fio_posixaio_event(struct thread_data *td, int event) +{ + struct posixaio_data *pd = td->io_ops->data; + + return pd->aio_events[event]; +} + +static int fio_posixaio_queue(struct thread_data fio_unused *td, + struct io_u *io_u) +{ + struct aiocb *aiocb = &io_u->aiocb; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = aio_read(aiocb); + else + ret = aio_write(aiocb); + + if (ret) + io_u->error = errno; + + return io_u->error; +} + +static void fio_posixaio_cleanup(struct thread_data *td) +{ + struct posixaio_data *pd = td->io_ops->data; + + if (pd) { + free(pd->aio_events); + free(pd); + td->io_ops->data = NULL; + } +} + +static int fio_posixaio_init(struct thread_data *td) +{ + struct posixaio_data *pd = malloc(sizeof(*pd)); + + pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *)); + + td->io_ops->data = pd; + return 0; +} + +struct ioengine_ops ioengine = { + .name = "posixaio", + .version = FIO_IOOPS_VERSION, + .init = fio_posixaio_init, + .prep = fio_posixaio_prep, + .queue = fio_posixaio_queue, + .cancel = fio_posixaio_cancel, + .getevents = fio_posixaio_getevents, + .event = fio_posixaio_event, + .cleanup = fio_posixaio_cleanup, + .sync = fio_posixaio_sync, +}; diff --git a/engines/fio-engine-sg.c b/engines/fio-engine-sg.c new file mode 100644 index 00000000..59eea1df --- /dev/null +++ b/engines/fio-engine-sg.c @@ -0,0 +1,324 @@ +/* + * scsi generic sg v3 io engine + * + */ +#include +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +struct sgio_cmd { + unsigned char cdb[10]; + int nr; +}; + +struct sgio_data { + struct sgio_cmd *cmds; + struct io_u **events; + unsigned int bs; +}; + +static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, + struct io_u *io_u, int fs) +{ + struct sgio_cmd *sc = &sd->cmds[io_u->index]; + + memset(hdr, 0, sizeof(*hdr)); + memset(sc->cdb, 0, sizeof(sc->cdb)); + + hdr->interface_id = 'S'; + hdr->cmdp = sc->cdb; + hdr->cmd_len = sizeof(sc->cdb); + hdr->pack_id = io_u->index; + hdr->usr_ptr = io_u; + + if (fs) { + hdr->dxferp = io_u->buf; + hdr->dxfer_len = io_u->buflen; + } +} + +static int fio_sgio_ioctl_getevents(struct thread_data *td, int fio_unused min, + int max, struct timespec fio_unused *t) +{ + assert(max <= 1); + + /* + * we can only have one finished io_u for sync io, since the depth + * is always 1 + */ + if (list_empty(&td->io_u_busylist)) + return 0; + + return 1; +} + + +static int fio_sgio_getevents(struct thread_data *td, int min, int max, + struct timespec fio_unused *t) +{ + struct sgio_data *sd = td->io_ops->data; + struct pollfd pfd = { .fd = td->fd, .events = POLLIN }; + void *buf = malloc(max * sizeof(struct sg_io_hdr)); + int left = max, ret, events, i, r = 0, fl = 0; + + /* + * don't block for !events + */ + if (!min) { + fl = fcntl(td->fd, F_GETFL); + fcntl(td->fd, F_SETFL, fl | O_NONBLOCK); + } + + while (left) { + do { + if (!min) + break; + poll(&pfd, 1, -1); + if (pfd.revents & POLLIN) + break; + } while (1); + + ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr)); + if (ret < 0) { + if (errno == EAGAIN) + break; + td_verror(td, errno); + r = -1; + break; + } else if (!ret) + break; + + events = ret / sizeof(struct sg_io_hdr); + left -= events; + r += events; + + for (i = 0; i < events; i++) { + struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; + + sd->events[i] = hdr->usr_ptr; + } + } + + if (!min) + fcntl(td->fd, F_SETFL, fl); + + free(buf); + return r; +} + +static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u) +{ + struct sgio_data *sd = td->io_ops->data; + struct sg_io_hdr *hdr = &io_u->hdr; + + sd->events[0] = io_u; + + return ioctl(td->fd, SG_IO, hdr); +} + +static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync) +{ + struct sg_io_hdr *hdr = &io_u->hdr; + int ret; + + ret = write(td->fd, hdr, sizeof(*hdr)); + if (ret < 0) + return errno; + + if (sync) { + ret = read(td->fd, hdr, sizeof(*hdr)); + if (ret < 0) + return errno; + } + + return 0; +} + +static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync) +{ + if (td->filetype == FIO_TYPE_BD) + return fio_sgio_ioctl_doio(td, io_u); + + return fio_sgio_rw_doio(td, io_u, sync); +} + +static int fio_sgio_sync(struct thread_data *td) +{ + struct sgio_data *sd = td->io_ops->data; + struct sg_io_hdr *hdr; + struct io_u *io_u; + int ret; + + io_u = __get_io_u(td); + if (!io_u) + return ENOMEM; + + hdr = &io_u->hdr; + sgio_hdr_init(sd, hdr, io_u, 0); + hdr->dxfer_direction = SG_DXFER_NONE; + + hdr->cmdp[0] = 0x35; + + ret = fio_sgio_doio(td, io_u, 1); + put_io_u(td, io_u); + return ret; +} + +static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) +{ + struct sg_io_hdr *hdr = &io_u->hdr; + struct sgio_data *sd = td->io_ops->data; + int nr_blocks, lba; + + if (io_u->buflen & (sd->bs - 1)) { + log_err("read/write not sector aligned\n"); + return EINVAL; + } + + sgio_hdr_init(sd, hdr, io_u, 1); + + if (io_u->ddir == DDIR_READ) { + hdr->dxfer_direction = SG_DXFER_FROM_DEV; + hdr->cmdp[0] = 0x28; + } else { + hdr->dxfer_direction = SG_DXFER_TO_DEV; + hdr->cmdp[0] = 0x2a; + } + + nr_blocks = io_u->buflen / sd->bs; + lba = io_u->offset / sd->bs; + hdr->cmdp[2] = (lba >> 24) & 0xff; + hdr->cmdp[3] = (lba >> 16) & 0xff; + hdr->cmdp[4] = (lba >> 8) & 0xff; + hdr->cmdp[5] = lba & 0xff; + hdr->cmdp[7] = (nr_blocks >> 8) & 0xff; + hdr->cmdp[8] = nr_blocks & 0xff; + return 0; +} + +static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct sg_io_hdr *hdr = &io_u->hdr; + int ret; + + ret = fio_sgio_doio(td, io_u, 0); + + if (ret < 0) + io_u->error = errno; + else if (hdr->status) { + io_u->resid = hdr->resid; + io_u->error = EIO; + } + + return io_u->error; +} + +static struct io_u *fio_sgio_event(struct thread_data *td, int event) +{ + struct sgio_data *sd = td->io_ops->data; + + return sd->events[event]; +} + +static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs) +{ + struct sgio_data *sd = td->io_ops->data; + struct io_u *io_u; + struct sg_io_hdr *hdr; + unsigned char buf[8]; + int ret; + + io_u = __get_io_u(td); + assert(io_u); + + hdr = &io_u->hdr; + sgio_hdr_init(sd, hdr, io_u, 0); + memset(buf, 0, sizeof(buf)); + + hdr->cmdp[0] = 0x25; + hdr->dxfer_direction = SG_DXFER_FROM_DEV; + hdr->dxferp = buf; + hdr->dxfer_len = sizeof(buf); + + ret = fio_sgio_doio(td, io_u, 1); + if (ret) { + put_io_u(td, io_u); + return ret; + } + + *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; + put_io_u(td, io_u); + return 0; +} + +static void fio_sgio_cleanup(struct thread_data *td) +{ + if (td->io_ops->data) { + free(td->io_ops->data); + td->io_ops->data = NULL; + } +} + +static int fio_sgio_init(struct thread_data *td) +{ + struct sgio_data *sd; + unsigned int bs; + int ret; + + sd = malloc(sizeof(*sd)); + sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd)); + sd->events = malloc(td->iodepth * sizeof(struct io_u *)); + td->io_ops->data = sd; + + if (td->filetype == FIO_TYPE_BD) { + if (ioctl(td->fd, BLKSSZGET, &bs) < 0) { + td_verror(td, errno); + return 1; + } + } else if (td->filetype == FIO_TYPE_CHAR) { + int version; + + if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) { + td_verror(td, errno); + return 1; + } + + ret = fio_sgio_get_bs(td, &bs); + if (ret) + return ret; + } else { + log_err("ioengine sgio only works on block devices\n"); + return 1; + } + + sd->bs = bs; + + if (td->filetype == FIO_TYPE_BD) + td->io_ops->getevents = fio_sgio_ioctl_getevents; + else + td->io_ops->getevents = fio_sgio_getevents; + + /* + * we want to do it, regardless of whether odirect is set or not + */ + td->override_sync = 1; + return 0; +} + +struct ioengine_ops ioengine = { + .name = "sg", + .version = FIO_IOOPS_VERSION, + .init = fio_sgio_init, + .prep = fio_sgio_prep, + .queue = fio_sgio_queue, + .getevents = fio_sgio_getevents, + .event = fio_sgio_event, + .cleanup = fio_sgio_cleanup, + .sync = fio_sgio_sync, + .flags = FIO_SYNCIO, +}; diff --git a/engines/fio-engine-splice.c b/engines/fio-engine-splice.c new file mode 100644 index 00000000..30984f16 --- /dev/null +++ b/engines/fio-engine-splice.c @@ -0,0 +1,193 @@ +/* + * splice io engine + * + */ +#include +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +struct spliceio_data { + struct io_u *last_io_u; + int pipe[2]; +}; + +static int fio_spliceio_sync(struct thread_data *td) +{ + return fsync(td->fd); +} + +static int fio_spliceio_getevents(struct thread_data *td, int fio_unused min, + int max, struct timespec fio_unused *t) +{ + assert(max <= 1); + + /* + * we can only have one finished io_u for sync io, since the depth + * is always 1 + */ + if (list_empty(&td->io_u_busylist)) + return 0; + + return 1; +} + +static struct io_u *fio_spliceio_event(struct thread_data *td, int event) +{ + struct spliceio_data *sd = td->io_ops->data; + + assert(event == 0); + + return sd->last_io_u; +} + +/* + * For splice reading, we unfortunately cannot (yet) vmsplice the other way. + * So just splice the data from the file into the pipe, and use regular + * read to fill the buffer. Doesn't make a lot of sense, but... + */ +static int fio_splice_read(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_ops->data; + int ret, ret2, buflen; + off_t offset; + void *p; + + offset = io_u->offset; + buflen = io_u->buflen; + p = io_u->buf; + while (buflen) { + int this_len = buflen; + + if (this_len > SPLICE_DEF_SIZE) + this_len = SPLICE_DEF_SIZE; + + ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); + if (ret < 0) { + if (errno == ENODATA || errno == EAGAIN) + continue; + + return errno; + } + + buflen -= ret; + + while (ret) { + ret2 = read(sd->pipe[0], p, ret); + if (ret2 < 0) + return errno; + + ret -= ret2; + p += ret2; + } + } + + return io_u->buflen; +} + +/* + * For splice writing, we can vmsplice our data buffer directly into a + * pipe and then splice that to a file. + */ +static int fio_splice_write(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_ops->data; + struct iovec iov[1] = { + { + .iov_base = io_u->buf, + .iov_len = io_u->buflen, + } + }; + struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; + off_t off = io_u->offset; + int ret, ret2; + + while (iov[0].iov_len) { + if (poll(&pfd, 1, -1) < 0) + return errno; + + ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK); + if (ret < 0) + return errno; + + iov[0].iov_len -= ret; + iov[0].iov_base += ret; + + while (ret) { + ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0); + if (ret2 < 0) + return errno; + + ret -= ret2; + } + } + + return io_u->buflen; +} + +static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_ops->data; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = fio_splice_read(td, io_u); + else + ret = fio_splice_write(td, io_u); + + if ((unsigned int) ret != io_u->buflen) { + if (ret > 0) { + io_u->resid = io_u->buflen - ret; + io_u->error = ENODATA; + } else + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static void fio_spliceio_cleanup(struct thread_data *td) +{ + struct spliceio_data *sd = td->io_ops->data; + + if (sd) { + close(sd->pipe[0]); + close(sd->pipe[1]); + free(sd); + td->io_ops->data = NULL; + } +} + +static int fio_spliceio_init(struct thread_data *td) +{ + struct spliceio_data *sd = malloc(sizeof(*sd)); + + sd->last_io_u = NULL; + if (pipe(sd->pipe) < 0) { + td_verror(td, errno); + free(sd); + return 1; + } + + td->io_ops->data = sd; + return 0; +} + +struct ioengine_ops ioengine = { + .name = "splice", + .version = FIO_IOOPS_VERSION, + .init = fio_spliceio_init, + .queue = fio_spliceio_queue, + .getevents = fio_spliceio_getevents, + .event = fio_spliceio_event, + .cleanup = fio_spliceio_cleanup, + .sync = fio_spliceio_sync, + .flags = FIO_SYNCIO, +}; diff --git a/engines/fio-engine-sync.c b/engines/fio-engine-sync.c new file mode 100644 index 00000000..abc29f40 --- /dev/null +++ b/engines/fio-engine-sync.c @@ -0,0 +1,108 @@ +/* + * regular read/write sync io engine + * + */ +#include +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +struct syncio_data { + struct io_u *last_io_u; +}; + +static int fio_syncio_sync(struct thread_data *td) +{ + return fsync(td->fd); +} + +static int fio_syncio_getevents(struct thread_data *td, int fio_unused min, + int max, struct timespec fio_unused *t) +{ + assert(max <= 1); + + /* + * we can only have one finished io_u for sync io, since the depth + * is always 1 + */ + if (list_empty(&td->io_u_busylist)) + return 0; + + return 1; +} + +static struct io_u *fio_syncio_event(struct thread_data *td, int event) +{ + struct syncio_data *sd = td->io_ops->data; + + assert(event == 0); + + return sd->last_io_u; +} + +static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) +{ + if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) { + td_verror(td, errno); + return 1; + } + + return 0; +} + +static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct syncio_data *sd = td->io_ops->data; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = read(td->fd, io_u->buf, io_u->buflen); + else + ret = write(td->fd, io_u->buf, io_u->buflen); + + if ((unsigned int) ret != io_u->buflen) { + if (ret > 0) { + io_u->resid = io_u->buflen - ret; + io_u->error = EIO; + } else + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static void fio_syncio_cleanup(struct thread_data *td) +{ + if (td->io_ops->data) { + free(td->io_ops->data); + td->io_ops->data = NULL; + } +} + +static int fio_syncio_init(struct thread_data *td) +{ + struct syncio_data *sd = malloc(sizeof(*sd)); + + sd->last_io_u = NULL; + td->io_ops->data = sd; + return 0; +} + +struct ioengine_ops ioengine = { + .name = "sync", + .version = FIO_IOOPS_VERSION, + .init = fio_syncio_init, + .prep = fio_syncio_prep, + .queue = fio_syncio_queue, + .getevents = fio_syncio_getevents, + .event = fio_syncio_event, + .cleanup = fio_syncio_cleanup, + .sync = fio_syncio_sync, + .flags = FIO_SYNCIO, +}; diff --git a/fio.c b/fio.c index 445babd5..a2651f03 100644 --- a/fio.c +++ b/fio.c @@ -434,7 +434,7 @@ static void populate_io_u(struct thread_data *td, struct io_u *io_u) static int td_io_prep(struct thread_data *td, struct io_u *io_u) { - if (td->io_prep && td->io_prep(td, io_u)) + if (td->io_ops->prep && td->io_ops->prep(td, io_u)) return 1; return 0; @@ -569,8 +569,8 @@ static int get_next_verify(struct thread_data *td, struct io_u *io_u) static int sync_td(struct thread_data *td) { - if (td->io_sync) - return td->io_sync(td); + if (td->io_ops->sync) + return td->io_ops->sync(td); return 0; } @@ -578,14 +578,14 @@ static int sync_td(struct thread_data *td) static int io_u_getevents(struct thread_data *td, int min, int max, struct timespec *t) { - return td->io_getevents(td, min, max, t); + return td->io_ops->getevents(td, min, max, t); } static int io_u_queue(struct thread_data *td, struct io_u *io_u) { gettimeofday(&io_u->issue_time, NULL); - return td->io_queue(td, io_u); + return td->io_ops->queue(td, io_u); } #define iocb_time(iocb) ((unsigned long) (iocb)->data) @@ -629,7 +629,7 @@ static void ios_completed(struct thread_data *td,struct io_completion_data *icd) icd->bytes_done[0] = icd->bytes_done[1] = 0; for (i = 0; i < icd->nr; i++) { - io_u = td->io_event(td, i); + io_u = td->io_ops->event(td, i); io_completed(td, io_u, icd); put_io_u(td, io_u); @@ -660,11 +660,11 @@ static void cleanup_pending_aio(struct thread_data *td) /* * now cancel remaining active events */ - if (td->io_cancel) { + if (td->io_ops->cancel) { list_for_each_safe(entry, n, &td->io_u_busylist) { io_u = list_entry(entry, struct io_u, list); - r = td->io_cancel(td, io_u); + r = td->io_ops->cancel(td, io_u); if (!r) put_io_u(td, io_u); } @@ -749,7 +749,7 @@ static void do_verify(struct thread_data *td) break; } - v_io_u = td->io_event(td, 0); + v_io_u = td->io_ops->event(td, 0); icd.nr = 1; icd.error = 0; io_completed(td, v_io_u, &icd); @@ -895,32 +895,12 @@ static void do_io(struct thread_data *td) } } -static void cleanup_io(struct thread_data *td) -{ - if (td->io_cleanup) - td->io_cleanup(td); -} - static int init_io(struct thread_data *td) { - if (td->io_engine == FIO_SYNCIO) - return fio_syncio_init(td); - else if (td->io_engine == FIO_MMAPIO) - return fio_mmapio_init(td); - else if (td->io_engine == FIO_LIBAIO) - return fio_libaio_init(td); - else if (td->io_engine == FIO_POSIXAIO) - return fio_posixaio_init(td); - else if (td->io_engine == FIO_SGIO) - return fio_sgio_init(td); - else if (td->io_engine == FIO_SPLICEIO) - return fio_spliceio_init(td); - else if (td->io_engine == FIO_CPUIO) - return fio_cpuio_init(td); - else { - log_err("bad io_engine %d\n", td->io_engine); - return 1; - } + if (td->io_ops->init) + return td->io_ops->init(td); + + return 0; } static void cleanup_io_u(struct thread_data *td) @@ -956,10 +936,10 @@ static int init_io_u(struct thread_data *td) int i, max_units; char *p; - if (td->io_engine == FIO_CPUIO) + if (td->io_ops->flags & FIO_CPUIO) return 0; - if (td->io_engine & FIO_SYNCIO) + if (td->io_ops->flags & FIO_SYNCIO) max_units = 1; else max_units = td->iodepth; @@ -1229,7 +1209,7 @@ static int setup_file(struct thread_data *td) struct stat st; int flags = 0; - if (td->io_engine == FIO_CPUIO) + if (td->io_ops->flags & FIO_CPUIO) return 0; if (stat(td->file_name, &st) == -1) { @@ -1282,10 +1262,10 @@ static int setup_file(struct thread_data *td) if (get_file_size(td)) return 1; - if (td->io_engine != FIO_MMAPIO) - return setup_file_plain(td); - else + if (td->io_ops->flags & FIO_MMAPIO) return setup_file_mmap(td); + else + return setup_file_plain(td); } static int switch_ioscheduler(struct thread_data *td) @@ -1338,7 +1318,7 @@ static int switch_ioscheduler(struct thread_data *td) static void clear_io_state(struct thread_data *td) { - if (td->io_engine == FIO_SYNCIO) + if (td->io_ops->flags & FIO_SYNCIO) lseek(td->fd, SEEK_SET, 0); td->last_pos = 0; @@ -1423,7 +1403,7 @@ static void *thread_main(void *data) clear_io_state(td); prune_io_piece_log(td); - if (td->io_engine == FIO_CPUIO) + if (td->io_ops->flags & FIO_CPUIO) do_cpuio(td); else do_io(td); @@ -1472,7 +1452,7 @@ err: } if (td->mmap) munmap(td->mmap, td->file_size); - cleanup_io(td); + close_ioengine(td); cleanup_io_u(td); td_set_runstate(td, TD_EXITED); return NULL; @@ -1738,7 +1718,7 @@ static void reap_threads(int *nr_running, int *t_rate, int *m_rate) for (i = 0, cputhreads = 0; i < thread_number; i++) { struct thread_data *td = &threads[i]; - if (td->io_engine == FIO_CPUIO) + if (td->io_ops->flags & FIO_CPUIO) cputhreads++; if (td->runstate != TD_EXITED) diff --git a/fio.h b/fio.h index ee47599b..64f3b2d1 100644 --- a/fio.h +++ b/fio.h @@ -124,14 +124,10 @@ enum fio_filetype { FIO_TYPE_CHAR, }; -enum fio_iotype { +enum fio_ioengine_flags { FIO_SYNCIO = 1 << 0, - FIO_MMAPIO = 1 << 1 | FIO_SYNCIO, - FIO_LIBAIO = 1 << 2, - FIO_POSIXAIO = 1 << 3, - FIO_SGIO = 1 << 4, - FIO_SPLICEIO = 1 << 5 | FIO_SYNCIO, - FIO_CPUIO = 1 << 6, + FIO_CPUIO = 1 << 1, + FIO_MMAPIO = 1 << 2, }; /* @@ -179,7 +175,6 @@ struct thread_data { unsigned int fsync_blocks; unsigned int start_delay; unsigned long timeout; - enum fio_iotype io_engine; unsigned int overwrite; unsigned int bw_avg_time; unsigned int loops; @@ -216,15 +211,7 @@ struct thread_data { * IO engine hooks, contains everything needed to submit an io_u * to any of the available IO engines. */ - void *io_data; - char io_engine_name[16]; - int (*io_prep)(struct thread_data *, struct io_u *); - int (*io_queue)(struct thread_data *, struct io_u *); - int (*io_getevents)(struct thread_data *, int, int, struct timespec *); - struct io_u *(*io_event)(struct thread_data *, int); - int (*io_cancel)(struct thread_data *, struct io_u *); - void (*io_cleanup)(struct thread_data *); - int (*io_sync)(struct thread_data *); + struct ioengine_ops *io_ops; /* * Current IO depth and list of free and busy io_u's. @@ -463,4 +450,30 @@ static inline void fio_sem_up(volatile int *sem) fprintf(stderr, ##args); \ } while (0) +struct ioengine_ops { + char name[16]; + int version; + int flags; + int (*init)(struct thread_data *); + int (*prep)(struct thread_data *, struct io_u *); + int (*queue)(struct thread_data *, struct io_u *); + int (*getevents)(struct thread_data *, int, int, struct timespec *); + struct io_u *(*event)(struct thread_data *, int); + int (*cancel)(struct thread_data *, struct io_u *); + void (*cleanup)(struct thread_data *); + int (*sync)(struct thread_data *); + void *data; + void *dlhandle; +}; + +#define FIO_IOOPS_VERSION 1 + +extern struct ioengine_ops *load_ioengine(struct thread_data *, char *); +extern void close_ioengine(struct thread_data *); + +/* + * Mark unused variables passed to ops functions as unused, to silence gcc + */ +#define fio_unused __attribute((__unused__)) + #endif diff --git a/init.c b/init.c index 1ffd6ff0..3eaf9b7f 100644 --- a/init.c +++ b/init.c @@ -124,7 +124,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num) if (td == &def_thread) return 0; - if (td->io_engine & FIO_SYNCIO) + if (td->io_ops->flags & FIO_SYNCIO) td->iodepth = 1; else { if (!td->iodepth) @@ -197,10 +197,10 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num) if (!terse_output) { if (!job_add_num) { - if (td->io_engine == FIO_CPUIO) + if (td->io_ops->flags & FIO_CPUIO) fprintf(f_out, "%s: ioengine=cpu, cpuload=%u, cpucycle=%u\n", td->name, td->cpuload, td->cpucycle); else - fprintf(f_out, "%s: (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->name, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); + fprintf(f_out, "%s: (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->name, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_ops->name, td->iodepth); } else if (job_add_num == 1) fprintf(f_out, "...\n"); } @@ -594,36 +594,12 @@ static int str_mem_cb(struct thread_data *td, char *mem) static int str_ioengine_cb(struct thread_data *td, char *str) { - if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) || - !strncmp(str, "libaio", 6)) { - strcpy(td->io_engine_name, "libaio"); - td->io_engine = FIO_LIBAIO; - return 0; - } else if (!strncmp(str, "posixaio", 8)) { - strcpy(td->io_engine_name, "posixaio"); - td->io_engine = FIO_POSIXAIO; - return 0; - } else if (!strncmp(str, "sync", 4)) { - strcpy(td->io_engine_name, "sync"); - td->io_engine = FIO_SYNCIO; - return 0; - } else if (!strncmp(str, "mmap", 4)) { - strcpy(td->io_engine_name, "mmap"); - td->io_engine = FIO_MMAPIO; - return 0; - } else if (!strncmp(str, "sgio", 4)) { - strcpy(td->io_engine_name, "sgio"); - td->io_engine = FIO_SGIO; - return 0; - } else if (!strncmp(str, "splice", 6)) { - strcpy(td->io_engine_name, "splice"); - td->io_engine = FIO_SPLICEIO; - return 0; - } else if (!strncmp(str, "cpu", 3)) { - strcpy(td->io_engine_name, "cpu"); - td->io_engine = FIO_CPUIO; + if (!str) + str = DEF_IO_ENGINE_NAME; + + td->io_ops = load_ioengine(td, str); + if (td->io_ops) return 0; - } log_err("fio: ioengine: { linuxaio, aio, libaio }, posixaio, sync, mmap, sgio, splice, cpu\n"); return 1; @@ -1002,8 +978,6 @@ static int fill_def_thread(void) def_thread.bs = DEF_BS; def_thread.min_bs = -1; def_thread.max_bs = -1; - def_thread.io_engine = DEF_IO_ENGINE; - strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME); def_thread.odirect = DEF_ODIRECT; def_thread.ratecycle = DEF_RATE_CYCLE; def_thread.sequential = DEF_SEQUENTIAL; diff --git a/ioengines.c b/ioengines.c index 01492bcf..723f310d 100644 --- a/ioengines.c +++ b/ioengines.c @@ -12,923 +12,43 @@ #include #include #include -#include -#include -#include #include -#include -#include +#include #include "fio.h" #include "os.h" -static int fill_timespec(struct timespec *ts) +struct ioengine_ops *load_ioengine(struct thread_data *td, char *name) { -#ifdef _POSIX_TIMERS - if (!clock_gettime(CLOCK_MONOTONIC, ts)) - return 0; + char engine[16], engine_lib[256]; + struct ioengine_ops *ops; + void *dlhandle; - perror("clock_gettime"); -#endif - return 1; -} - -static unsigned long long ts_utime_since_now(struct timespec *t) -{ - long long sec, nsec; - struct timespec now; - - if (fill_timespec(&now)) - return 0; - - sec = now.tv_sec - t->tv_sec; - nsec = now.tv_nsec - t->tv_nsec; - if (sec > 0 && nsec < 0) { - sec--; - nsec += 1000000000; - } - - sec *= 1000000; - nsec /= 1000; - return sec + nsec; -} - -static int fio_io_sync(struct thread_data *td) -{ - return fsync(td->fd); -} - -#ifdef FIO_HAVE_LIBAIO - -#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj) - -struct libaio_data { - io_context_t aio_ctx; - struct io_event *aio_events; -}; - -static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u) -{ - if (io_u->ddir == DDIR_READ) - io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); - else - io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); - - return 0; -} - -static struct io_u *fio_libaio_event(struct thread_data *td, int event) -{ - struct libaio_data *ld = td->io_data; - - return ev_to_iou(ld->aio_events + event); -} - -static int fio_libaio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - struct libaio_data *ld = td->io_data; - long r; - - do { - r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t); - if (r == -EAGAIN) { - usleep(100); - continue; - } else if (r == -EINTR) - continue; - else - break; - } while (1); - - return (int) r; -} - -static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct libaio_data *ld = td->io_data; - struct iocb *iocb = &io_u->iocb; - long ret; - - do { - ret = io_submit(ld->aio_ctx, 1, &iocb); - if (ret == 1) - return 0; - else if (ret == -EAGAIN) - usleep(100); - else if (ret == -EINTR) - continue; - else - break; - } while (1); - - return (int) ret; - -} - -static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u) -{ - struct libaio_data *ld = td->io_data; - - return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events); -} - -static void fio_libaio_cleanup(struct thread_data *td) -{ - struct libaio_data *ld = td->io_data; - - if (ld) { - io_destroy(ld->aio_ctx); - if (ld->aio_events) - free(ld->aio_events); - - free(ld); - td->io_data = NULL; - } -} - -int fio_libaio_init(struct thread_data *td) -{ - struct libaio_data *ld = malloc(sizeof(*ld)); - - memset(ld, 0, sizeof(*ld)); - if (io_queue_init(td->iodepth, &ld->aio_ctx)) { - td_verror(td, errno); - return 1; - } - - td->io_prep = fio_libaio_io_prep; - td->io_queue = fio_libaio_queue; - td->io_getevents = fio_libaio_getevents; - td->io_event = fio_libaio_event; - td->io_cancel = fio_libaio_cancel; - td->io_cleanup = fio_libaio_cleanup; - td->io_sync = fio_io_sync; - - ld->aio_events = malloc(td->iodepth * sizeof(struct io_event)); - td->io_data = ld; - return 0; -} - -#else /* FIO_HAVE_LIBAIO */ - -int fio_libaio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_LIBAIO */ - -#ifdef FIO_HAVE_POSIXAIO - -struct posixaio_data { - struct io_u **aio_events; -}; - -static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u) -{ - int r = aio_cancel(td->fd, &io_u->aiocb); - - if (r == 1 || r == AIO_CANCELED) - return 0; - - return 1; -} - -static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct aiocb *aiocb = &io_u->aiocb; - - aiocb->aio_fildes = td->fd; - aiocb->aio_buf = io_u->buf; - aiocb->aio_nbytes = io_u->buflen; - aiocb->aio_offset = io_u->offset; - - io_u->seen = 0; - return 0; -} - -static int fio_posixaio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - struct posixaio_data *pd = td->io_data; - struct list_head *entry; - struct timespec start; - int r, have_timeout = 0; - - if (t && !fill_timespec(&start)) - have_timeout = 1; - - r = 0; -restart: - list_for_each(entry, &td->io_u_busylist) { - struct io_u *io_u = list_entry(entry, struct io_u, list); - int err; - - if (io_u->seen) - continue; - - err = aio_error(&io_u->aiocb); - switch (err) { - default: - io_u->error = err; - case ECANCELED: - case 0: - pd->aio_events[r++] = io_u; - io_u->seen = 1; - break; - case EINPROGRESS: - break; - } - - if (r >= max) - break; - } - - if (r >= min) - return r; - - if (have_timeout) { - unsigned long long usec; - - usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000); - if (ts_utime_since_now(&start) > usec) - return r; - } - - /* - * hrmpf, we need to wait for more. we should use aio_suspend, for - * now just sleep a little and recheck status of busy-and-not-seen - */ - usleep(1000); - goto restart; -} - -static struct io_u *fio_posixaio_event(struct thread_data *td, int event) -{ - struct posixaio_data *pd = td->io_data; - - return pd->aio_events[event]; -} - -static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct aiocb *aiocb = &io_u->aiocb; - int ret; - - if (io_u->ddir == DDIR_READ) - ret = aio_read(aiocb); - else - ret = aio_write(aiocb); - - if (ret) - io_u->error = errno; - - return io_u->error; -} - -static void fio_posixaio_cleanup(struct thread_data *td) -{ - struct posixaio_data *pd = td->io_data; - - if (pd) { - free(pd->aio_events); - free(pd); - td->io_data = NULL; - } -} - -int fio_posixaio_init(struct thread_data *td) -{ - struct posixaio_data *pd = malloc(sizeof(*pd)); - - pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *)); - - td->io_prep = fio_posixaio_prep; - td->io_queue = fio_posixaio_queue; - td->io_getevents = fio_posixaio_getevents; - td->io_event = fio_posixaio_event; - td->io_cancel = fio_posixaio_cancel; - td->io_cleanup = fio_posixaio_cleanup; - td->io_sync = fio_io_sync; - - td->io_data = pd; - return 0; -} - -#else /* FIO_HAVE_POSIXAIO */ - -int fio_posixaio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_POSIXAIO */ - -struct syncio_data { - struct io_u *last_io_u; -}; - -static int fio_syncio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - assert(max <= 1); - - /* - * we can only have one finished io_u for sync io, since the depth - * is always 1 - */ - if (list_empty(&td->io_u_busylist)) - return 0; - - return 1; -} - -static struct io_u *fio_syncio_event(struct thread_data *td, int event) -{ - struct syncio_data *sd = td->io_data; - - assert(event == 0); - - return sd->last_io_u; -} - -static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u) -{ - if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) { - td_verror(td, errno); - return 1; - } - - return 0; -} - -static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct syncio_data *sd = td->io_data; - int ret; - - if (io_u->ddir == DDIR_READ) - ret = read(td->fd, io_u->buf, io_u->buflen); - else - ret = write(td->fd, io_u->buf, io_u->buflen); - - if ((unsigned int) ret != io_u->buflen) { - if (ret > 0) { - io_u->resid = io_u->buflen - ret; - io_u->error = EIO; - } else - io_u->error = errno; - } - - if (!io_u->error) - sd->last_io_u = io_u; - - return io_u->error; -} - -static void fio_syncio_cleanup(struct thread_data *td) -{ - if (td->io_data) { - free(td->io_data); - td->io_data = NULL; - } -} - -int fio_syncio_init(struct thread_data *td) -{ - struct syncio_data *sd = malloc(sizeof(*sd)); - - td->io_prep = fio_syncio_prep; - td->io_queue = fio_syncio_queue; - td->io_getevents = fio_syncio_getevents; - td->io_event = fio_syncio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_syncio_cleanup; - td->io_sync = fio_io_sync; - - sd->last_io_u = NULL; - td->io_data = sd; - return 0; -} - -static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u) -{ - unsigned long long real_off = io_u->offset - td->file_offset; - struct syncio_data *sd = td->io_data; - - if (io_u->ddir == DDIR_READ) - memcpy(io_u->buf, td->mmap + real_off, io_u->buflen); - else - memcpy(td->mmap + real_off, io_u->buf, io_u->buflen); - - /* - * not really direct, but should drop the pages from the cache - */ - if (td->odirect) { - if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0) - io_u->error = errno; - if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0) - io_u->error = errno; - } - - if (!io_u->error) - sd->last_io_u = io_u; - - return io_u->error; -} - -static int fio_mmapio_sync(struct thread_data *td) -{ - return msync(td->mmap, td->file_size, MS_SYNC); -} - -int fio_mmapio_init(struct thread_data *td) -{ - struct syncio_data *sd = malloc(sizeof(*sd)); - - td->io_prep = NULL; - td->io_queue = fio_mmapio_queue; - td->io_getevents = fio_syncio_getevents; - td->io_event = fio_syncio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_syncio_cleanup; - td->io_sync = fio_mmapio_sync; - - sd->last_io_u = NULL; - td->io_data = sd; - return 0; -} - -#ifdef FIO_HAVE_SGIO - -struct sgio_cmd { - unsigned char cdb[10]; - int nr; -}; - -struct sgio_data { - struct sgio_cmd *cmds; - struct io_u **events; - unsigned int bs; -}; - -static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr, - struct io_u *io_u, int fs) -{ - struct sgio_cmd *sc = &sd->cmds[io_u->index]; - - memset(hdr, 0, sizeof(*hdr)); - memset(sc->cdb, 0, sizeof(sc->cdb)); - - hdr->interface_id = 'S'; - hdr->cmdp = sc->cdb; - hdr->cmd_len = sizeof(sc->cdb); - hdr->pack_id = io_u->index; - hdr->usr_ptr = io_u; - - if (fs) { - hdr->dxferp = io_u->buf; - hdr->dxfer_len = io_u->buflen; - } -} - -static int fio_sgio_getevents(struct thread_data *td, int min, int max, - struct timespec *t) -{ - struct sgio_data *sd = td->io_data; - struct pollfd pfd = { .fd = td->fd, .events = POLLIN }; - void *buf = malloc(max * sizeof(struct sg_io_hdr)); - int left = max, ret, events, i, r = 0, fl = 0; - - /* - * don't block for !events - */ - if (!min) { - fl = fcntl(td->fd, F_GETFL); - fcntl(td->fd, F_SETFL, fl | O_NONBLOCK); - } - - while (left) { - do { - if (!min) - break; - poll(&pfd, 1, -1); - if (pfd.revents & POLLIN) - break; - } while (1); - - ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr)); - if (ret < 0) { - if (errno == EAGAIN) - break; - td_verror(td, errno); - r = -1; - break; - } else if (!ret) - break; - - events = ret / sizeof(struct sg_io_hdr); - left -= events; - r += events; - - for (i = 0; i < events; i++) { - struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i; - - sd->events[i] = hdr->usr_ptr; - } - } - - if (!min) - fcntl(td->fd, F_SETFL, fl); - - free(buf); - return r; -} - -static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u) -{ - struct sgio_data *sd = td->io_data; - struct sg_io_hdr *hdr = &io_u->hdr; - - sd->events[0] = io_u; - - return ioctl(td->fd, SG_IO, hdr); -} - -static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - int ret; - - ret = write(td->fd, hdr, sizeof(*hdr)); - if (ret < 0) - return errno; - - if (sync) { - ret = read(td->fd, hdr, sizeof(*hdr)); - if (ret < 0) - return errno; - } - - return 0; -} - -static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync) -{ - if (td->filetype == FIO_TYPE_BD) - return fio_sgio_ioctl_doio(td, io_u); - - return fio_sgio_rw_doio(td, io_u, sync); -} - -static int fio_sgio_sync(struct thread_data *td) -{ - struct sgio_data *sd = td->io_data; - struct sg_io_hdr *hdr; - struct io_u *io_u; - int ret; - - io_u = __get_io_u(td); - if (!io_u) - return ENOMEM; - - hdr = &io_u->hdr; - sgio_hdr_init(sd, hdr, io_u, 0); - hdr->dxfer_direction = SG_DXFER_NONE; - - hdr->cmdp[0] = 0x35; - - ret = fio_sgio_doio(td, io_u, 1); - put_io_u(td, io_u); - return ret; -} - -static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - struct sgio_data *sd = td->io_data; - int nr_blocks, lba; - - if (io_u->buflen & (sd->bs - 1)) { - log_err("read/write not sector aligned\n"); - return EINVAL; - } - - sgio_hdr_init(sd, hdr, io_u, 1); - - if (io_u->ddir == DDIR_READ) { - hdr->dxfer_direction = SG_DXFER_FROM_DEV; - hdr->cmdp[0] = 0x28; - } else { - hdr->dxfer_direction = SG_DXFER_TO_DEV; - hdr->cmdp[0] = 0x2a; - } - - nr_blocks = io_u->buflen / sd->bs; - lba = io_u->offset / sd->bs; - hdr->cmdp[2] = (lba >> 24) & 0xff; - hdr->cmdp[3] = (lba >> 16) & 0xff; - hdr->cmdp[4] = (lba >> 8) & 0xff; - hdr->cmdp[5] = lba & 0xff; - hdr->cmdp[7] = (nr_blocks >> 8) & 0xff; - hdr->cmdp[8] = nr_blocks & 0xff; - return 0; -} - -static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct sg_io_hdr *hdr = &io_u->hdr; - int ret; - - ret = fio_sgio_doio(td, io_u, 0); - - if (ret < 0) - io_u->error = errno; - else if (hdr->status) { - io_u->resid = hdr->resid; - io_u->error = EIO; - } - - return io_u->error; -} - -static struct io_u *fio_sgio_event(struct thread_data *td, int event) -{ - struct sgio_data *sd = td->io_data; - - return sd->events[event]; -} - -static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs) -{ - struct sgio_data *sd = td->io_data; - struct io_u *io_u; - struct sg_io_hdr *hdr; - unsigned char buf[8]; - int ret; - - io_u = __get_io_u(td); - assert(io_u); - - hdr = &io_u->hdr; - sgio_hdr_init(sd, hdr, io_u, 0); - memset(buf, 0, sizeof(buf)); - - hdr->cmdp[0] = 0x25; - hdr->dxfer_direction = SG_DXFER_FROM_DEV; - hdr->dxferp = buf; - hdr->dxfer_len = sizeof(buf); - - ret = fio_sgio_doio(td, io_u, 1); - if (ret) { - put_io_u(td, io_u); - return ret; - } - - *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; - put_io_u(td, io_u); - return 0; -} - -int fio_sgio_init(struct thread_data *td) -{ - struct sgio_data *sd; - unsigned int bs; - int ret; - - sd = malloc(sizeof(*sd)); - sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd)); - sd->events = malloc(td->iodepth * sizeof(struct io_u *)); - td->io_data = sd; - - if (td->filetype == FIO_TYPE_BD) { - if (ioctl(td->fd, BLKSSZGET, &bs) < 0) { - td_verror(td, errno); - return 1; - } - } else if (td->filetype == FIO_TYPE_CHAR) { - int version; - - if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) { - td_verror(td, errno); - return 1; - } - - ret = fio_sgio_get_bs(td, &bs); - if (ret) - return ret; - } else { - log_err("ioengine sgio only works on block devices\n"); - return 1; - } - - sd->bs = bs; - - td->io_prep = fio_sgio_prep; - td->io_queue = fio_sgio_queue; - - if (td->filetype == FIO_TYPE_BD) - td->io_getevents = fio_syncio_getevents; - else - td->io_getevents = fio_sgio_getevents; - - td->io_event = fio_sgio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_syncio_cleanup; - td->io_sync = fio_sgio_sync; + strcpy(engine, name); /* - * we want to do it, regardless of whether odirect is set or not + * linux libaio has alias names, so convert to what we want */ - td->override_sync = 1; - return 0; -} - -#else /* FIO_HAVE_SGIO */ + if (!strncmp(engine, "linuxaio", 8) || !strncmp(engine, "aio", 3)) + strcpy(engine, "libaio"); -int fio_sgio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_SGIO */ + sprintf(engine_lib, "/usr/local/lib/fio/fio-engine-%s.o", engine); + dlerror(); + dlhandle = dlopen(engine_lib, RTLD_LAZY); + if (!dlhandle) + printf("bla: %s\n", dlerror()); -#ifdef FIO_HAVE_SPLICE -struct spliceio_data { - struct io_u *last_io_u; - int pipe[2]; -}; - -static struct io_u *fio_spliceio_event(struct thread_data *td, int event) -{ - struct spliceio_data *sd = td->io_data; + ops = dlsym(dlhandle, "ioengine"); + if (!ops) + printf("get ops failed\n"); - assert(event == 0); - - return sd->last_io_u; + ops->dlhandle = dlhandle; + return ops; } -/* - * For splice reading, we unfortunately cannot (yet) vmsplice the other way. - * So just splice the data from the file into the pipe, and use regular - * read to fill the buffer. Doesn't make a lot of sense, but... - */ -static int fio_splice_read(struct thread_data *td, struct io_u *io_u) +void close_ioengine(struct thread_data *td) { - struct spliceio_data *sd = td->io_data; - int ret, ret2, buflen; - off_t offset; - void *p; - - offset = io_u->offset; - buflen = io_u->buflen; - p = io_u->buf; - while (buflen) { - int this_len = buflen; - - if (this_len > SPLICE_DEF_SIZE) - this_len = SPLICE_DEF_SIZE; - - ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE); - if (ret < 0) { - if (errno == ENODATA || errno == EAGAIN) - continue; - - return errno; - } - - buflen -= ret; - - while (ret) { - ret2 = read(sd->pipe[0], p, ret); - if (ret2 < 0) - return errno; - - ret -= ret2; - p += ret2; - } - } - - return io_u->buflen; -} - -/* - * For splice writing, we can vmsplice our data buffer directly into a - * pipe and then splice that to a file. - */ -static int fio_splice_write(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_data; - struct iovec iov[1] = { - { - .iov_base = io_u->buf, - .iov_len = io_u->buflen, - } - }; - struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; - off_t off = io_u->offset; - int ret, ret2; - - while (iov[0].iov_len) { - if (poll(&pfd, 1, -1) < 0) - return errno; - - ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK); - if (ret < 0) - return errno; - - iov[0].iov_len -= ret; - iov[0].iov_base += ret; - - while (ret) { - ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0); - if (ret2 < 0) - return errno; - - ret -= ret2; - } - } - - return io_u->buflen; -} - -static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) -{ - struct spliceio_data *sd = td->io_data; - int ret; - - if (io_u->ddir == DDIR_READ) - ret = fio_splice_read(td, io_u); - else - ret = fio_splice_write(td, io_u); - - if ((unsigned int) ret != io_u->buflen) { - if (ret > 0) { - io_u->resid = io_u->buflen - ret; - io_u->error = ENODATA; - } else - io_u->error = errno; - } - - if (!io_u->error) - sd->last_io_u = io_u; - - return io_u->error; -} - -static void fio_spliceio_cleanup(struct thread_data *td) -{ - struct spliceio_data *sd = td->io_data; - - if (sd) { - close(sd->pipe[0]); - close(sd->pipe[1]); - free(sd); - td->io_data = NULL; - } -} - -int fio_spliceio_init(struct thread_data *td) -{ - struct spliceio_data *sd = malloc(sizeof(*sd)); - - td->io_queue = fio_spliceio_queue; - td->io_getevents = fio_syncio_getevents; - td->io_event = fio_spliceio_event; - td->io_cancel = NULL; - td->io_cleanup = fio_spliceio_cleanup; - td->io_sync = fio_io_sync; - - sd->last_io_u = NULL; - if (pipe(sd->pipe) < 0) { - td_verror(td, errno); - free(sd); - return 1; - } - - td->io_data = sd; - return 0; -} - -#else /* FIO_HAVE_SPLICE */ - -int fio_spliceio_init(struct thread_data *td) -{ - return EINVAL; -} - -#endif /* FIO_HAVE_SPLICE */ - -int fio_cpuio_init(struct thread_data *td) -{ - if (!td->cpuload) { - td_vmsg(td, EINVAL, "cpu thread needs rate"); - return 1; - } else if (td->cpuload > 100) - td->cpuload = 100; - - td->read_iolog = td->write_iolog = 0; - td->fd = -1; + if (td->io_ops->cleanup) + td->io_ops->cleanup(td); - return 0; + dlclose(td->io_ops->dlhandle); } diff --git a/os.h b/os.h index 68660b6d..b44d34c6 100644 --- a/os.h +++ b/os.h @@ -47,13 +47,4 @@ #define OS_O_DIRECT O_DIRECT #endif -struct thread_data; -extern int fio_libaio_init(struct thread_data *); -extern int fio_posixaio_init(struct thread_data *); -extern int fio_syncio_init(struct thread_data *); -extern int fio_mmapio_init(struct thread_data *); -extern int fio_sgio_init(struct thread_data *); -extern int fio_spliceio_init(struct thread_data *); -extern int fio_cpuio_init(struct thread_data *); - #endif -- 2.25.1