#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <time.h>
#include <string.h>
-#include <sys/mman.h>
-#include <sys/poll.h>
-#include "fio.h"
-#include "os.h"
+#include <dlfcn.h>
+#include <assert.h>
-static int fill_timespec(struct timespec *ts)
-{
-#ifdef _POSIX_TIMERS
- if (!clock_gettime(CLOCK_MONOTONIC, ts))
- return 0;
+#include "fio.h"
+#include "diskutil.h"
- perror("clock_gettime");
-#endif
- return 1;
-}
+static FLIST_HEAD(engine_list);
-static unsigned long long ts_utime_since_now(struct timespec *t)
+static int check_engine_ops(struct ioengine_ops *ops)
{
- long long sec, nsec;
- struct timespec now;
-
- if (fill_timespec(&now))
- return 0;
-
- sec = now.tv_sec - t->tv_sec;
- nsec = now.tv_nsec - t->tv_nsec;
- if (sec > 0 && nsec < 0) {
- sec--;
- nsec += 1000000000;
+ if (ops->version != FIO_IOOPS_VERSION) {
+ log_err("bad ioops version %d (want %d)\n", ops->version,
+ FIO_IOOPS_VERSION);
+ return 1;
}
- sec *= 1000000;
- nsec /= 1000;
- return sec + nsec;
-}
-
-static int fio_io_sync(struct thread_data *td)
-{
- return fsync(td->fd);
-}
-
-#ifdef FIO_HAVE_LIBAIO
-
-#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
-
-struct libaio_data {
- io_context_t aio_ctx;
- struct io_event *aio_events;
-};
-
-static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (io_u->ddir == DDIR_READ)
- io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
- else
- io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
-
- return 0;
-}
-
-static struct io_u *fio_libaio_event(struct thread_data *td, int event)
-{
- struct libaio_data *ld = td->io_data;
-
- return ev_to_iou(ld->aio_events + event);
-}
-
-static int fio_libaio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct libaio_data *ld = td->io_data;
- int r;
-
- do {
- r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
- if (r == -EAGAIN) {
- usleep(100);
- continue;
- } else if (r == -EINTR)
- continue;
- else
- break;
- } while (1);
-
- return r;
-}
-
-static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_data;
- struct iocb *iocb = &io_u->iocb;
- int ret;
-
- do {
- ret = io_submit(ld->aio_ctx, 1, &iocb);
- if (ret == 1)
- return 0;
- else if (ret == -EAGAIN)
- usleep(100);
- else if (ret == -EINTR)
- continue;
- else
- break;
- } while (1);
-
- return ret;
-
-}
-
-static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_data;
-
- return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
-}
-
-static void fio_libaio_cleanup(struct thread_data *td)
-{
- struct libaio_data *ld = td->io_data;
-
- if (ld) {
- io_destroy(ld->aio_ctx);
- if (ld->aio_events)
- free(ld->aio_events);
-
- free(ld);
- td->io_data = NULL;
+ if (!ops->queue) {
+ log_err("%s: no queue handler\n", ops->name);
+ return 1;
}
-}
-int fio_libaio_init(struct thread_data *td)
-{
- struct libaio_data *ld = malloc(sizeof(*ld));
+ /*
+ * sync engines only need a ->queue()
+ */
+ if (ops->flags & FIO_SYNCIO)
+ return 0;
- memset(ld, 0, sizeof(*ld));
- if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
- td_verror(td, errno);
+ if (!ops->event) {
+ log_err("%s: no event handler\n", ops->name);
+ return 1;
+ }
+ if (!ops->getevents) {
+ log_err("%s: no getevents handler\n", ops->name);
+ return 1;
+ }
+ if (!ops->queue) {
+ log_err("%s: no queue handler\n", ops->name);
return 1;
}
- td->io_prep = fio_libaio_io_prep;
- td->io_queue = fio_libaio_queue;
- td->io_getevents = fio_libaio_getevents;
- td->io_event = fio_libaio_event;
- td->io_cancel = fio_libaio_cancel;
- td->io_cleanup = fio_libaio_cleanup;
- td->io_sync = fio_io_sync;
-
- ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
- td->io_data = ld;
return 0;
}
-#else /* FIO_HAVE_LIBAIO */
-
-int fio_libaio_init(struct thread_data *td)
+void unregister_ioengine(struct ioengine_ops *ops)
{
- return EINVAL;
+ dprint(FD_IO, "ioengine %s unregistered\n", ops->name);
+ flist_del(&ops->list);
+ INIT_FLIST_HEAD(&ops->list);
}
-#endif /* FIO_HAVE_LIBAIO */
-
-#ifdef FIO_HAVE_POSIXAIO
-
-struct posixaio_data {
- struct io_u **aio_events;
-};
-
-static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
+void register_ioengine(struct ioengine_ops *ops)
{
- int r = aio_cancel(td->fd, &io_u->aiocb);
-
- if (r == 1 || r == AIO_CANCELED)
- return 0;
-
- return 1;
+ dprint(FD_IO, "ioengine %s registered\n", ops->name);
+ INIT_FLIST_HEAD(&ops->list);
+ flist_add_tail(&ops->list, &engine_list);
}
-static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
+static struct ioengine_ops *find_ioengine(const char *name)
{
- struct aiocb *aiocb = &io_u->aiocb;
+ struct ioengine_ops *ops;
+ struct flist_head *entry;
- aiocb->aio_fildes = td->fd;
- aiocb->aio_buf = io_u->buf;
- aiocb->aio_nbytes = io_u->buflen;
- aiocb->aio_offset = io_u->offset;
+ flist_for_each(entry, &engine_list) {
+ ops = flist_entry(entry, struct ioengine_ops, list);
+ if (!strcmp(name, ops->name))
+ return ops;
+ }
- io_u->seen = 0;
- return 0;
+ return NULL;
}
-static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
+static struct ioengine_ops *dlopen_ioengine(struct thread_data *td,
+ const char *engine_lib)
{
- struct posixaio_data *pd = td->io_data;
- struct list_head *entry;
- struct timespec start;
- int r, have_timeout = 0;
+ struct ioengine_ops *ops;
+ void *dlhandle;
- if (t && !fill_timespec(&start))
- have_timeout = 1;
+ dprint(FD_IO, "dload engine %s\n", engine_lib);
- r = 0;
-restart:
- list_for_each(entry, &td->io_u_busylist) {
- struct io_u *io_u = list_entry(entry, struct io_u, list);
- int err;
-
- if (io_u->seen)
- continue;
-
- err = aio_error(&io_u->aiocb);
- switch (err) {
- default:
- io_u->error = err;
- case ECANCELED:
- case 0:
- pd->aio_events[r++] = io_u;
- io_u->seen = 1;
- break;
- case EINPROGRESS:
- break;
- }
-
- if (r >= max)
- break;
- }
-
- if (r >= min)
- return r;
-
- if (have_timeout) {
- unsigned long long usec;
-
- usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
- if (ts_utime_since_now(&start) > usec)
- return r;
+ dlerror();
+ dlhandle = dlopen(engine_lib, RTLD_LAZY);
+ if (!dlhandle) {
+ td_vmsg(td, -1, dlerror(), "dlopen");
+ return NULL;
}
/*
- * hrmpf, we need to wait for more. we should use aio_suspend, for
- * now just sleep a little and recheck status of busy-and-not-seen
+ * Unlike the included modules, external engines should have a
+ * non-static ioengine structure that we can reference.
*/
- usleep(1000);
- goto restart;
-}
-
-static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
-{
- struct posixaio_data *pd = td->io_data;
-
- return pd->aio_events[event];
-}
-
-static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct aiocb *aiocb = &io_u->aiocb;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = aio_read(aiocb);
- else
- ret = aio_write(aiocb);
-
- if (ret)
- io_u->error = errno;
-
- return io_u->error;
-}
-
-static void fio_posixaio_cleanup(struct thread_data *td)
-{
- struct posixaio_data *pd = td->io_data;
-
- if (pd) {
- free(pd->aio_events);
- free(pd);
- td->io_data = NULL;
+ ops = dlsym(dlhandle, "ioengine");
+ if (!ops) {
+ td_vmsg(td, -1, dlerror(), "dlsym");
+ dlclose(dlhandle);
+ return NULL;
}
-}
-int fio_posixaio_init(struct thread_data *td)
-{
- struct posixaio_data *pd = malloc(sizeof(*pd));
-
- pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
-
- td->io_prep = fio_posixaio_prep;
- td->io_queue = fio_posixaio_queue;
- td->io_getevents = fio_posixaio_getevents;
- td->io_event = fio_posixaio_event;
- td->io_cancel = fio_posixaio_cancel;
- td->io_cleanup = fio_posixaio_cleanup;
- td->io_sync = fio_io_sync;
-
- td->io_data = pd;
- return 0;
+ ops->dlhandle = dlhandle;
+ return ops;
}
-#else /* FIO_HAVE_POSIXAIO */
-
-int fio_posixaio_init(struct thread_data *td)
+struct ioengine_ops *load_ioengine(struct thread_data *td, const char *name)
{
- return EINVAL;
-}
+ struct ioengine_ops *ops, *ret;
+ char engine[16];
-#endif /* FIO_HAVE_POSIXAIO */
+ dprint(FD_IO, "load ioengine %s\n", name);
-struct syncio_data {
- struct io_u *last_io_u;
-};
-
-static int fio_syncio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- assert(max <= 1);
+ strncpy(engine, name, sizeof(engine) - 1);
/*
- * we can only have one finished io_u for sync io, since the depth
- * is always 1
+ * linux libaio has alias names, so convert to what we want
*/
- if (list_empty(&td->io_u_busylist))
- return 0;
-
- return 1;
-}
+ if (!strncmp(engine, "linuxaio", 8) || !strncmp(engine, "aio", 3))
+ strcpy(engine, "libaio");
-static struct io_u *fio_syncio_event(struct thread_data *td, int event)
-{
- struct syncio_data *sd = td->io_data;
+ ops = find_ioengine(engine);
+ if (!ops)
+ ops = dlopen_ioengine(td, name);
- assert(event == 0);
+ if (!ops) {
+ log_err("fio: engine %s not loadable\n", name);
+ return NULL;
+ }
- return sd->last_io_u;
-}
+ /*
+ * Check that the required methods are there.
+ */
+ if (check_engine_ops(ops))
+ return NULL;
-static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
- td_verror(td, errno);
- return 1;
- }
+ ret = malloc(sizeof(*ret));
+ memcpy(ret, ops, sizeof(*ret));
+ ret->data = NULL;
- return 0;
+ return ret;
}
-static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
+void close_ioengine(struct thread_data *td)
{
- struct syncio_data *sd = td->io_data;
- int ret;
+ dprint(FD_IO, "close ioengine %s\n", td->io_ops->name);
- if (io_u->ddir == DDIR_READ)
- ret = read(td->fd, io_u->buf, io_u->buflen);
- else
- ret = write(td->fd, io_u->buf, io_u->buflen);
-
- if ((unsigned int) ret != io_u->buflen) {
- if (ret > 0) {
- io_u->resid = io_u->buflen - ret;
- io_u->error = EIO;
- } else
- io_u->error = errno;
+ if (td->io_ops->cleanup) {
+ td->io_ops->cleanup(td);
+ td->io_ops->data = NULL;
}
- if (!io_u->error)
- sd->last_io_u = io_u;
+ if (td->io_ops->dlhandle)
+ dlclose(td->io_ops->dlhandle);
- return io_u->error;
+ free(td->io_ops);
+ td->io_ops = NULL;
}
-static void fio_syncio_cleanup(struct thread_data *td)
+int td_io_prep(struct thread_data *td, struct io_u *io_u)
{
- if (td->io_data) {
- free(td->io_data);
- td->io_data = NULL;
- }
-}
-
-int fio_syncio_init(struct thread_data *td)
-{
- struct syncio_data *sd = malloc(sizeof(*sd));
-
- td->io_prep = fio_syncio_prep;
- td->io_queue = fio_syncio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_syncio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_io_sync;
-
- sd->last_io_u = NULL;
- td->io_data = sd;
- return 0;
-}
+ dprint_io_u(io_u, "prep");
+ fio_ro_check(td, io_u);
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
-{
- unsigned long long real_off = io_u->offset - td->file_offset;
- struct syncio_data *sd = td->io_data;
+ lock_file(td, io_u->file, io_u->ddir);
- if (io_u->ddir == DDIR_READ)
- memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
- else
- memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+ if (td->io_ops->prep) {
+ int ret = td->io_ops->prep(td, io_u);
- /*
- * not really direct, but should drop the pages from the cache
- */
- if (td->odirect) {
- if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
- io_u->error = errno;
- if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
- io_u->error = errno;
+ dprint(FD_IO, "->prep(%p)=%d\n", io_u, ret);
+ if (ret)
+ unlock_file(td, io_u->file);
+ return ret;
}
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static int fio_mmapio_sync(struct thread_data *td)
-{
- return msync(td->mmap, td->file_size, MS_SYNC);
-}
-
-int fio_mmapio_init(struct thread_data *td)
-{
- struct syncio_data *sd = malloc(sizeof(*sd));
-
- td->io_prep = NULL;
- td->io_queue = fio_mmapio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_syncio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_mmapio_sync;
-
- sd->last_io_u = NULL;
- td->io_data = sd;
return 0;
}
-#ifdef FIO_HAVE_SGIO
-
-struct sgio_cmd {
- unsigned char cdb[10];
- int nr;
-};
-
-struct sgio_data {
- struct sgio_cmd *cmds;
- struct io_u **events;
- unsigned int bs;
-};
-
-static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
- struct io_u *io_u, int fs)
+int td_io_getevents(struct thread_data *td, unsigned int min, unsigned int max,
+ struct timespec *t)
{
- struct sgio_cmd *sc = &sd->cmds[io_u->index];
-
- memset(hdr, 0, sizeof(*hdr));
- memset(sc->cdb, 0, sizeof(sc->cdb));
+ int r = 0;
- hdr->interface_id = 'S';
- hdr->cmdp = sc->cdb;
- hdr->cmd_len = sizeof(sc->cdb);
- hdr->pack_id = io_u->index;
- hdr->usr_ptr = io_u;
-
- if (fs) {
- hdr->dxferp = io_u->buf;
- hdr->dxfer_len = io_u->buflen;
+ if (min > 0 && td->io_ops->commit) {
+ r = td->io_ops->commit(td);
+ if (r < 0)
+ goto out;
}
-}
+ if (max > td->cur_depth)
+ max = td->cur_depth;
+ if (min > max)
+ max = min;
-static int fio_sgio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct sgio_data *sd = td->io_data;
- struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
- void *buf = malloc(max * sizeof(struct sg_io_hdr));
- int left = max, ret, events, i, r = 0, fl = 0;
-
- /*
- * don't block for !events
- */
- if (!min) {
- fl = fcntl(td->fd, F_GETFL);
- fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
- }
-
- while (left) {
- do {
- if (!min)
- break;
- poll(&pfd, 1, -1);
- if (pfd.revents & POLLIN)
- break;
- } while (1);
-
- ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
- if (ret < 0) {
- if (errno == EAGAIN)
- break;
- td_verror(td, errno);
- r = -1;
- break;
- } else if (!ret)
- break;
-
- events = ret / sizeof(struct sg_io_hdr);
- left -= events;
- r += events;
-
- for (i = 0; i < events; i++) {
- struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
-
- sd->events[i] = hdr->usr_ptr;
- }
- }
-
- if (!min)
- fcntl(td->fd, F_SETFL, fl);
+ r = 0;
+ if (max && td->io_ops->getevents)
+ r = td->io_ops->getevents(td, min, max, t);
+out:
+ if (r >= 0)
+ io_u_mark_complete(td, r);
+ else
+ td_verror(td, r, "get_events");
- free(buf);
+ dprint(FD_IO, "getevents: %d\n", r);
return r;
}
-static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
+int td_io_queue(struct thread_data *td, struct io_u *io_u)
{
- struct sgio_data *sd = td->io_data;
- struct sg_io_hdr *hdr = &io_u->hdr;
+ int ret;
- sd->events[0] = io_u;
+ dprint_io_u(io_u, "queue");
+ fio_ro_check(td, io_u);
- return ioctl(td->fd, SG_IO, hdr);
-}
+ assert((io_u->flags & IO_U_F_FLIGHT) == 0);
+ io_u->flags |= IO_U_F_FLIGHT;
-static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
+ assert(fio_file_open(io_u->file));
- ret = write(td->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return errno;
+ io_u->error = 0;
+ io_u->resid = 0;
- if (sync) {
- ret = read(td->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return errno;
- }
-
- return 0;
-}
+ if (td->io_ops->flags & FIO_SYNCIO) {
+ if (fio_fill_issue_time(td))
+ fio_gettime(&io_u->issue_time, NULL);
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
- if (td->filetype == FIO_TYPE_BD)
- return fio_sgio_ioctl_doio(td, io_u);
+ /*
+ * only used for iolog
+ */
+ if (td->o.read_iolog_file)
+ memcpy(&td->last_issue, &io_u->issue_time,
+ sizeof(struct timeval));
+ }
- return fio_sgio_rw_doio(td, io_u, sync);
-}
+ if (!ddir_sync(io_u->ddir))
+ td->io_issues[io_u->ddir]++;
-static int fio_sgio_sync(struct thread_data *td)
-{
- struct sgio_data *sd = td->io_data;
- struct sg_io_hdr *hdr;
- struct io_u *io_u;
- int ret;
+ ret = td->io_ops->queue(td, io_u);
- io_u = __get_io_u(td);
- if (!io_u)
- return ENOMEM;
+ unlock_file(td, io_u->file);
- hdr = &io_u->hdr;
- sgio_hdr_init(sd, hdr, io_u, 0);
- hdr->dxfer_direction = SG_DXFER_NONE;
+ /*
+ * Add warning for O_DIRECT so that users have an easier time
+ * spotting potentially bad alignment. If this triggers for the first
+ * IO, then it's likely an alignment problem or because the host fs
+ * does not support O_DIRECT
+ */
+ if (io_u->error == EINVAL && td->io_issues[io_u->ddir] == 1 &&
+ td->o.odirect) {
+ log_info("fio: first direct IO errored. File system may not "
+ "support direct IO, or iomem_align= is bad.\n");
+ }
- hdr->cmdp[0] = 0x35;
+ if (!td->io_ops->commit) {
+ io_u_mark_submit(td, 1);
+ io_u_mark_complete(td, 1);
+ }
- ret = fio_sgio_doio(td, io_u, 1);
- put_io_u(td, io_u);
- return ret;
-}
+ if (ret == FIO_Q_COMPLETED) {
+ if (!ddir_sync(io_u->ddir)) {
+ io_u_mark_depth(td, 1);
+ td->ts.total_io_u[io_u->ddir]++;
+ }
+ } else if (ret == FIO_Q_QUEUED) {
+ int r;
-static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- struct sgio_data *sd = td->io_data;
- int nr_blocks, lba;
+ if (!ddir_sync(io_u->ddir)) {
+ td->io_u_queued++;
+ td->ts.total_io_u[io_u->ddir]++;
+ }
- if (io_u->buflen & (sd->bs - 1)) {
- log_err("read/write not sector aligned\n");
- return EINVAL;
+ if (td->io_u_queued >= td->o.iodepth_batch) {
+ r = td_io_commit(td);
+ if (r < 0)
+ return r;
+ }
}
- sgio_hdr_init(sd, hdr, io_u, 1);
+ if ((td->io_ops->flags & FIO_SYNCIO) == 0) {
+ if (fio_fill_issue_time(td))
+ fio_gettime(&io_u->issue_time, NULL);
- if (io_u->ddir == DDIR_READ) {
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- hdr->cmdp[0] = 0x28;
- } else {
- hdr->dxfer_direction = SG_DXFER_TO_DEV;
- hdr->cmdp[0] = 0x2a;
+ /*
+ * only used for iolog
+ */
+ if (td->o.read_iolog_file)
+ memcpy(&td->last_issue, &io_u->issue_time,
+ sizeof(struct timeval));
}
- nr_blocks = io_u->buflen / sd->bs;
- lba = io_u->offset / sd->bs;
- hdr->cmdp[2] = (lba >> 24) & 0xff;
- hdr->cmdp[3] = (lba >> 16) & 0xff;
- hdr->cmdp[4] = (lba >> 8) & 0xff;
- hdr->cmdp[5] = lba & 0xff;
- hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
- hdr->cmdp[8] = nr_blocks & 0xff;
- return 0;
+ return ret;
}
-static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
+int td_io_init(struct thread_data *td)
{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
+ int ret = 0;
- ret = fio_sgio_doio(td, io_u, 0);
-
- if (ret < 0)
- io_u->error = errno;
- else if (hdr->status) {
- io_u->resid = hdr->resid;
- io_u->error = EIO;
+ if (td->io_ops->init) {
+ ret = td->io_ops->init(td);
+ if (ret && td->o.iodepth > 1) {
+ log_err("fio: io engine init failed. Perhaps try"
+ " reducing io depth?\n");
+ }
}
- return io_u->error;
-}
-
-static struct io_u *fio_sgio_event(struct thread_data *td, int event)
-{
- struct sgio_data *sd = td->io_data;
-
- return sd->events[event];
+ return ret;
}
-static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
+int td_io_commit(struct thread_data *td)
{
- struct sgio_data *sd = td->io_data;
- struct io_u *io_u;
- struct sg_io_hdr *hdr;
- unsigned char buf[8];
int ret;
- io_u = __get_io_u(td);
- assert(io_u);
+ dprint(FD_IO, "calling ->commit(), depth %d\n", td->cur_depth);
- hdr = &io_u->hdr;
- sgio_hdr_init(sd, hdr, io_u, 0);
- memset(buf, 0, sizeof(buf));
+ if (!td->cur_depth || !td->io_u_queued)
+ return 0;
- hdr->cmdp[0] = 0x25;
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- hdr->dxferp = buf;
- hdr->dxfer_len = sizeof(buf);
+ io_u_mark_depth(td, td->io_u_queued);
+ td->io_u_queued = 0;
- ret = fio_sgio_doio(td, io_u, 1);
- if (ret) {
- put_io_u(td, io_u);
- return ret;
+ if (td->io_ops->commit) {
+ ret = td->io_ops->commit(td);
+ if (ret)
+ td_verror(td, -ret, "io commit");
}
- *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
- put_io_u(td, io_u);
return 0;
}
-int fio_sgio_init(struct thread_data *td)
+int td_io_open_file(struct thread_data *td, struct fio_file *f)
{
- struct sgio_data *sd;
- unsigned int bs;
- int ret;
-
- sd = malloc(sizeof(*sd));
- sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
- sd->events = malloc(td->iodepth * sizeof(struct io_u *));
- td->io_data = sd;
+ assert(!fio_file_open(f));
+ assert(f->fd == -1);
- if (td->filetype == FIO_TYPE_BD) {
- if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
- td_verror(td, errno);
- return 1;
+ if (td->io_ops->open_file(td, f)) {
+ if (td->error == EINVAL && td->o.odirect)
+ log_err("fio: destination does not support O_DIRECT\n");
+ if (td->error == EMFILE) {
+ log_err("fio: try reducing/setting openfiles (failed"
+ " at %u of %u)\n", td->nr_open_files,
+ td->o.nr_files);
}
- } else if (td->filetype == FIO_TYPE_CHAR) {
- int version;
- if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
- td_verror(td, errno);
- return 1;
- }
-
- ret = fio_sgio_get_bs(td, &bs);
- if (ret)
- return ret;
- } else {
- log_err("ioengine sgio only works on block devices\n");
+ assert(f->fd == -1);
+ assert(!fio_file_open(f));
return 1;
}
- sd->bs = bs;
-
- td->io_prep = fio_sgio_prep;
- td->io_queue = fio_sgio_queue;
-
- if (td->filetype == FIO_TYPE_BD)
- td->io_getevents = fio_syncio_getevents;
- else
- td->io_getevents = fio_sgio_getevents;
-
- td->io_event = fio_sgio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_sgio_sync;
-
- /*
- * we want to do it, regardless of whether odirect is set or not
- */
- td->override_sync = 1;
- return 0;
-}
-
-#else /* FIO_HAVE_SGIO */
+ fio_file_reset(f);
+ fio_file_set_open(f);
+ fio_file_clear_closing(f);
+ disk_util_inc(f->du);
-int fio_sgio_init(struct thread_data *td)
-{
- return EINVAL;
-}
+ td->nr_open_files++;
+ get_file(f);
-#endif /* FIO_HAVE_SGIO */
+ if (f->filetype == FIO_TYPE_PIPE) {
+ if (td_random(td)) {
+ log_err("fio: can't seek on pipes (no random io)\n");
+ goto err;
+ }
+ }
-#ifdef FIO_HAVE_SPLICE
-struct spliceio_data {
- struct io_u *last_io_u;
- int pipe[2];
-};
+ if (td->io_ops->flags & FIO_DISKLESSIO)
+ goto done;
-static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
-{
- struct spliceio_data *sd = td->io_data;
+ if (td->o.invalidate_cache && file_invalidate_cache(td, f))
+ goto err;
- assert(event == 0);
+ if (td->o.fadvise_hint &&
+ (f->filetype == FIO_TYPE_BD || f->filetype == FIO_TYPE_FILE)) {
+ int flags;
- return sd->last_io_u;
-}
+ if (td_random(td))
+ flags = POSIX_FADV_RANDOM;
+ else
+ flags = POSIX_FADV_SEQUENTIAL;
-/*
- * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
- * So just splice the data from the file into the pipe, and use regular
- * read to fill the buffer. Doesn't make a lot of sense, but...
- */
-static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- int ret, ret2, buflen;
- off_t offset;
- void *p;
-
- offset = io_u->offset;
- buflen = io_u->buflen;
- p = io_u->buf;
- while (buflen) {
- int this_len = buflen;
-
- if (this_len > SPLICE_DEF_SIZE)
- this_len = SPLICE_DEF_SIZE;
-
- ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
- if (ret < 0) {
- if (errno == ENODATA || errno == EAGAIN)
- continue;
-
- return errno;
+ if (fadvise(f->fd, f->file_offset, f->io_size, flags) < 0) {
+ td_verror(td, errno, "fadvise");
+ goto err;
}
+ }
- buflen -= ret;
-
- while (ret) {
- ret2 = read(sd->pipe[0], p, ret);
- if (ret2 < 0)
- return errno;
+#ifdef FIO_OS_DIRECTIO
+ /*
+ * Some OS's have a distinct call to mark the file non-buffered,
+ * instead of using O_DIRECT (Solaris)
+ */
+ if (td->o.odirect) {
+ int ret = fio_set_odirect(f->fd);
- ret -= ret2;
- p += ret2;
+ if (ret) {
+ td_verror(td, ret, "fio_set_odirect");
+ goto err;
}
}
+#endif
- return io_u->buflen;
+done:
+ log_file(td, f, FIO_LOG_OPEN_FILE);
+ return 0;
+err:
+ disk_util_dec(f->du);
+ if (td->io_ops->close_file)
+ td->io_ops->close_file(td, f);
+ return 1;
}
-/*
- * For splice writing, we can vmsplice our data buffer directly into a
- * pipe and then splice that to a file.
- */
-static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
+int td_io_close_file(struct thread_data *td, struct fio_file *f)
{
- struct spliceio_data *sd = td->io_data;
- struct iovec iov[1] = {
- {
- .iov_base = io_u->buf,
- .iov_len = io_u->buflen,
- }
- };
- struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
- off_t off = io_u->offset;
- int ret, ret2;
-
- while (iov[0].iov_len) {
- if (poll(&pfd, 1, -1) < 0)
- return errno;
+ if (!fio_file_closing(f))
+ log_file(td, f, FIO_LOG_CLOSE_FILE);
- ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
- if (ret < 0)
- return errno;
+ /*
+ * mark as closing, do real close when last io on it has completed
+ */
+ fio_file_set_closing(f);
- iov[0].iov_len -= ret;
- iov[0].iov_base += ret;
+ disk_util_dec(f->du);
+ unlock_file_all(td, f);
- while (ret) {
- ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
- if (ret2 < 0)
- return errno;
+ return put_file(td, f);
+}
- ret -= ret2;
- }
- }
+int td_io_get_file_size(struct thread_data *td, struct fio_file *f)
+{
+ if (!td->io_ops->get_file_size)
+ return 0;
- return io_u->buflen;
+ return td->io_ops->get_file_size(td, f);
}
-static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
+static int do_sync_file_range(struct thread_data *td, struct fio_file *f)
{
- struct spliceio_data *sd = td->io_data;
- int ret;
+ off64_t offset, nbytes;
- if (io_u->ddir == DDIR_READ)
- ret = fio_splice_read(td, io_u);
- else
- ret = fio_splice_write(td, io_u);
-
- if ((unsigned int) ret != io_u->buflen) {
- if (ret > 0) {
- io_u->resid = io_u->buflen - ret;
- io_u->error = ENODATA;
- } else
- io_u->error = errno;
- }
+ offset = f->first_write;
+ nbytes = f->last_write - f->first_write;
- if (!io_u->error)
- sd->last_io_u = io_u;
+ if (!nbytes)
+ return 0;
- return io_u->error;
+ return sync_file_range(f->fd, offset, nbytes, td->o.sync_file_range);
}
-static void fio_spliceio_cleanup(struct thread_data *td)
+int do_io_u_sync(struct thread_data *td, struct io_u *io_u)
{
- struct spliceio_data *sd = td->io_data;
-
- if (sd) {
- close(sd->pipe[0]);
- close(sd->pipe[1]);
- free(sd);
- td->io_data = NULL;
- }
-}
+ int ret;
-int fio_spliceio_init(struct thread_data *td)
-{
- struct spliceio_data *sd = malloc(sizeof(*sd));
-
- td->io_queue = fio_spliceio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_spliceio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_spliceio_cleanup;
- td->io_sync = fio_io_sync;
-
- sd->last_io_u = NULL;
- if (pipe(sd->pipe) < 0) {
- td_verror(td, errno);
- free(sd);
- return 1;
+ if (io_u->ddir == DDIR_SYNC) {
+ ret = fsync(io_u->file->fd);
+ } else if (io_u->ddir == DDIR_DATASYNC) {
+#ifdef FIO_HAVE_FDATASYNC
+ ret = fdatasync(io_u->file->fd);
+#else
+ ret = io_u->xfer_buflen;
+ io_u->error = EINVAL;
+#endif
+ } else if (io_u->ddir == DDIR_SYNC_FILE_RANGE)
+ ret = do_sync_file_range(td, io_u->file);
+ else {
+ ret = io_u->xfer_buflen;
+ io_u->error = EINVAL;
}
- td->io_data = sd;
- return 0;
-}
-
-#else /* FIO_HAVE_SPLICE */
+ if (ret < 0)
+ io_u->error = errno;
-int fio_spliceio_init(struct thread_data *td)
-{
- return EINVAL;
+ return ret;
}
-
-#endif /* FIO_HAVE_SPLICE */