X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=ioengines.c;h=aa4ccd2755c96f843d69333448795b0b16061723;hp=984c01ac7e6442a99868ee03388a684c654318e8;hb=01bf5128d0581e267383f280c6a1dcd26517240f;hpb=391ba6ac169ad7deaf3bb3840046581f45d755e1 diff --git a/ioengines.c b/ioengines.c index 984c01ac..aa4ccd27 100644 --- a/ioengines.c +++ b/ioengines.c @@ -9,73 +9,65 @@ * generic io engine that could be used for other projects. * */ -#include #include #include #include #include +#include #include #include "fio.h" +#include "diskutil.h" +#include "zbd.h" -static LIST_HEAD(engine_list); +static FLIST_HEAD(engine_list); -static int check_engine_ops(struct ioengine_ops *ops) +static bool check_engine_ops(struct ioengine_ops *ops) { if (ops->version != FIO_IOOPS_VERSION) { log_err("bad ioops version %d (want %d)\n", ops->version, FIO_IOOPS_VERSION); - return 1; + return true; } if (!ops->queue) { log_err("%s: no queue handler\n", ops->name); - return 1; + return true; } /* * sync engines only need a ->queue() */ if (ops->flags & FIO_SYNCIO) - return 0; + return false; - if (!ops->event) { - log_err("%s: no event handler\n", ops->name); - return 1; - } - if (!ops->getevents) { - log_err("%s: no getevents handler\n", ops->name); - return 1; - } - if (!ops->queue) { - log_err("%s: no queue handler\n", ops->name); - return 1; + if (!ops->event || !ops->getevents) { + log_err("%s: no event/getevents handler\n", ops->name); + return true; } - return 0; + return false; } void unregister_ioengine(struct ioengine_ops *ops) { dprint(FD_IO, "ioengine %s unregistered\n", ops->name); - list_del(&ops->list); - INIT_LIST_HEAD(&ops->list); + flist_del_init(&ops->list); } void register_ioengine(struct ioengine_ops *ops) { dprint(FD_IO, "ioengine %s registered\n", ops->name); - INIT_LIST_HEAD(&ops->list); - list_add_tail(&ops->list, &engine_list); + flist_add_tail(&ops->list, &engine_list); } static struct ioengine_ops *find_ioengine(const char *name) { struct ioengine_ops *ops; - struct list_head *entry; + struct flist_head *entry; - list_for_each(entry, &engine_list) { - ops = list_entry(entry, struct ioengine_ops, list); + flist_for_each(entry, &engine_list) { + ops = flist_entry(entry, struct ioengine_ops, list); if (!strcmp(name, ops->name)) return ops; } @@ -102,36 +94,79 @@ static struct ioengine_ops *dlopen_ioengine(struct thread_data *td, * Unlike the included modules, external engines should have a * non-static ioengine structure that we can reference. */ - ops = dlsym(dlhandle, "ioengine"); + ops = dlsym(dlhandle, engine_lib); + if (!ops) + ops = dlsym(dlhandle, "ioengine"); + + /* + * For some external engines (like C++ ones) it is not that trivial + * to provide a non-static ionengine structure that we can reference. + * Instead we call a method which allocates the required ioengine + * structure. + */ + if (!ops) { + get_ioengine_t get_ioengine = dlsym(dlhandle, "get_ioengine"); + + if (get_ioengine) + get_ioengine(&ops); + } + if (!ops) { td_vmsg(td, -1, dlerror(), "dlsym"); dlclose(dlhandle); return NULL; } - ops->dlhandle = dlhandle; + td->io_ops_dlhandle = dlhandle; return ops; } -struct ioengine_ops *load_ioengine(struct thread_data *td, const char *name) +static struct ioengine_ops *__load_ioengine(const char *name) { - struct ioengine_ops *ops, *ret; - char engine[16]; - - dprint(FD_IO, "load ioengine %s\n", name); + char engine[64]; + engine[sizeof(engine) - 1] = '\0'; strncpy(engine, name, sizeof(engine) - 1); /* * linux libaio has alias names, so convert to what we want */ - if (!strncmp(engine, "linuxaio", 8) || !strncmp(engine, "aio", 3)) + if (!strncmp(engine, "linuxaio", 8)) { + dprint(FD_IO, "converting ioengine name: %s -> libaio\n", name); strcpy(engine, "libaio"); + } + + dprint(FD_IO, "load ioengine %s\n", engine); + return find_ioengine(engine); +} + +struct ioengine_ops *load_ioengine(struct thread_data *td) +{ + struct ioengine_ops *ops = NULL; + const char *name; + + /* + * Use ->ioengine_so_path if an external ioengine path is specified. + * In this case, ->ioengine is "external" which also means the prefix + * for external ioengines "external:" is properly used. + */ + name = td->o.ioengine_so_path ?: td->o.ioengine; - ops = find_ioengine(engine); + /* + * Try to load ->ioengine first, and if failed try to dlopen(3) either + * ->ioengine or ->ioengine_so_path. This is redundant for an external + * ioengine with prefix, and also leaves the possibility of unexpected + * behavior (e.g. if the "external" ioengine exists), but we do this + * so as not to break job files not using the prefix. + */ + ops = __load_ioengine(td->o.ioengine); if (!ops) ops = dlopen_ioengine(td, name); + /* + * If ops is NULL, we failed to load ->ioengine, and also failed to + * dlopen(3) either ->ioengine or ->ioengine_so_path as a path. + */ if (!ops) { log_err("fio: engine %s not loadable\n", name); return NULL; @@ -143,25 +178,40 @@ struct ioengine_ops *load_ioengine(struct thread_data *td, const char *name) if (check_engine_ops(ops)) return NULL; - ret = malloc(sizeof(*ret)); - memcpy(ret, ops, sizeof(*ret)); - ret->data = NULL; + return ops; +} - return ret; +/* + * For cleaning up an ioengine which never made it to init(). + */ +void free_ioengine(struct thread_data *td) +{ + dprint(FD_IO, "free ioengine %s\n", td->io_ops->name); + + if (td->eo && td->io_ops->options) { + options_free(td->io_ops->options, td->eo); + free(td->eo); + td->eo = NULL; + } + + if (td->io_ops_dlhandle) { + dlclose(td->io_ops_dlhandle); + td->io_ops_dlhandle = NULL; + } + + td->io_ops = NULL; } void close_ioengine(struct thread_data *td) { dprint(FD_IO, "close ioengine %s\n", td->io_ops->name); - if (td->io_ops->cleanup) + if (td->io_ops->cleanup) { td->io_ops->cleanup(td); + td->io_ops_data = NULL; + } - if (td->io_ops->dlhandle) - dlclose(td->io_ops->dlhandle); - - free(td->io_ops); - td->io_ops = NULL; + free_ioengine(td); } int td_io_prep(struct thread_data *td, struct io_u *io_u) @@ -174,7 +224,8 @@ int td_io_prep(struct thread_data *td, struct io_u *io_u) if (td->io_ops->prep) { int ret = td->io_ops->prep(td, io_u); - dprint(FD_IO, "->prep(%p)=%d\n", io_u, ret); + dprint(FD_IO, "prep: io_u %p: ret=%d\n", io_u, ret); + if (ret) unlock_file(td, io_u->file); return ret; @@ -184,86 +235,174 @@ int td_io_prep(struct thread_data *td, struct io_u *io_u) } int td_io_getevents(struct thread_data *td, unsigned int min, unsigned int max, - struct timespec *t) + const struct timespec *t) { int r = 0; + /* + * For ioengine=rdma one side operation RDMA_WRITE or RDMA_READ, + * server side gets a message from the client + * side that the task is finished, and + * td->done is set to 1 after td_io_commit(). In this case, + * there is no need to reap complete event in server side. + */ + if (td->done) + return 0; + if (min > 0 && td->io_ops->commit) { r = td->io_ops->commit(td); if (r < 0) goto out; } + if (max > td->cur_depth) + max = td->cur_depth; + if (min > max) + max = min; r = 0; - if (td->io_ops->getevents) + if (max && td->io_ops->getevents) r = td->io_ops->getevents(td, min, max, t); out: + if (r >= 0) { + /* + * Reflect that our submitted requests were retrieved with + * whatever OS async calls are in the underlying engine. + */ + td->io_u_in_flight -= r; + io_u_mark_complete(td, r); + } else + td_verror(td, r, "get_events"); + dprint(FD_IO, "getevents: %d\n", r); return r; } -int td_io_queue(struct thread_data *td, struct io_u *io_u) +enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) { - int ret; + const enum fio_ddir ddir = acct_ddir(io_u); + unsigned long long buflen = io_u->xfer_buflen; + enum fio_q_status ret; dprint_io_u(io_u, "queue"); fio_ro_check(td, io_u); assert((io_u->flags & IO_U_F_FLIGHT) == 0); - io_u->flags |= IO_U_F_FLIGHT; + io_u_set(td, io_u, IO_U_F_FLIGHT); + + /* + * If overlap checking was enabled in offload mode we + * can release this lock that was acquired when we + * started the overlap check because the IO_U_F_FLIGHT + * flag is now set + */ + if (td_offload_overlap(td)) + pthread_mutex_unlock(&overlap_check); - assert(io_u->file->flags & FIO_FILE_OPEN); + assert(fio_file_open(io_u->file)); + + /* + * If using a write iolog, store this entry. + */ + log_io_u(td, io_u); io_u->error = 0; io_u->resid = 0; - if (td->io_ops->flags & FIO_SYNCIO) { - fio_gettime(&io_u->issue_time, NULL); - memcpy(&td->last_issue, &io_u->issue_time, - sizeof(struct timeval)); + if (td_ioengine_flagged(td, FIO_SYNCIO) || + (td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) && + io_u->ddir == DDIR_TRIM)) { + if (fio_fill_issue_time(td)) + fio_gettime(&io_u->issue_time, NULL); /* - * for a sync engine, set the timeout upfront + * only used for iolog */ - if (mtime_since(&td->timeout_end, &io_u->issue_time) - < IO_U_TIMEOUT) - io_u_set_timeout(td); + if (td->o.read_iolog_file) + memcpy(&td->last_issue, &io_u->issue_time, + sizeof(io_u->issue_time)); } - if (io_u->ddir != DDIR_SYNC) - td->io_issues[io_u->ddir]++; + if (ddir_rw(ddir)) { + if (!(io_u->flags & IO_U_F_VER_LIST)) { + td->io_issues[ddir]++; + td->io_issue_bytes[ddir] += buflen; + } + td->rate_io_issue_bytes[ddir] += buflen; + } ret = td->io_ops->queue(td, io_u); + zbd_queue_io_u(io_u, ret); unlock_file(td, io_u->file); - if (ret != FIO_Q_BUSY) - io_u_mark_depth(td, io_u, 1); + if (ret == FIO_Q_BUSY && ddir_rw(ddir)) { + td->io_issues[ddir]--; + td->io_issue_bytes[ddir] -= buflen; + td->rate_io_issue_bytes[ddir] -= buflen; + io_u_clear(td, io_u, IO_U_F_FLIGHT); + } - if (ret == FIO_Q_QUEUED) { - int r; + /* + * If an error was seen and the io engine didn't propagate it + * back to 'td', do so. + */ + if (io_u->error && !td->error) + td_verror(td, io_u->error, "td_io_queue"); - td->io_u_queued++; - if (td->io_u_queued > td->o.iodepth_batch) { - r = td_io_commit(td); - if (r < 0) - return r; + /* + * Add warning for O_DIRECT so that users have an easier time + * spotting potentially bad alignment. If this triggers for the first + * IO, then it's likely an alignment problem or because the host fs + * does not support O_DIRECT + */ + if (io_u->error == EINVAL && td->io_issues[io_u->ddir & 1] == 1 && + td->o.odirect) { + + log_info("fio: first direct IO errored. File system may not " + "support direct IO, or iomem_align= is bad, or " + "invalid block size. Try setting direct=0.\n"); + } + + if (zbd_unaligned_write(io_u->error) && + td->io_issues[io_u->ddir & 1] == 1 && + td->o.zone_mode != ZONE_MODE_ZBD) { + log_info("fio: first I/O failed. If %s is a zoned block device, consider --zonemode=zbd\n", + io_u->file->file_name); + } + + if (!td->io_ops->commit) { + io_u_mark_submit(td, 1); + io_u_mark_complete(td, 1); + zbd_put_io_u(io_u); + } + + if (ret == FIO_Q_COMPLETED) { + if (ddir_rw(io_u->ddir) || ddir_sync(io_u->ddir)) { + io_u_mark_depth(td, 1); + td->ts.total_io_u[io_u->ddir]++; } + } else if (ret == FIO_Q_QUEUED) { + td->io_u_queued++; + + if (ddir_rw(io_u->ddir) || ddir_sync(io_u->ddir)) + td->ts.total_io_u[io_u->ddir]++; + + if (td->io_u_queued >= td->o.iodepth_batch) + td_io_commit(td); } - if ((td->io_ops->flags & FIO_SYNCIO) == 0) { - fio_gettime(&io_u->issue_time, NULL); - memcpy(&td->last_issue, &io_u->issue_time, - sizeof(struct timeval)); + if (!td_ioengine_flagged(td, FIO_SYNCIO) && + (!td_ioengine_flagged(td, FIO_ASYNCIO_SYNC_TRIM) || + io_u->ddir != DDIR_TRIM)) { + if (fio_fill_issue_time(td)) + fio_gettime(&io_u->issue_time, NULL); /* - * async engine, set the timeout here + * only used for iolog */ - if (ret == FIO_Q_QUEUED && - (mtime_since(&td->timeout_end, &io_u->issue_time) - < IO_U_TIMEOUT)) { - io_u_set_timeout(td); - } + if (td->o.read_iolog_file) + memcpy(&td->last_issue, &io_u->issue_time, + sizeof(io_u->issue_time)); } return ret; @@ -275,31 +414,58 @@ int td_io_init(struct thread_data *td) if (td->io_ops->init) { ret = td->io_ops->init(td); - if (ret && td->o.iodepth > 1) { - log_err("fio: io engine init failed. Perhaps try" - " reducing io depth?\n"); - } + if (ret) + log_err("fio: io engine %s init failed.%s\n", + td->io_ops->name, + td->o.iodepth > 1 ? + " Perhaps try reducing io depth?" : ""); + else + td->io_ops_init = 1; + if (!td->error) + td->error = ret; } return ret; } -int td_io_commit(struct thread_data *td) +void td_io_commit(struct thread_data *td) { + int ret; + dprint(FD_IO, "calling ->commit(), depth %d\n", td->cur_depth); - if (!td->cur_depth) - return 0; + if (!td->cur_depth || !td->io_u_queued) + return; - td->io_u_queued = 0; - if (td->io_ops->commit) - return td->io_ops->commit(td); + io_u_mark_depth(td, td->io_u_queued); - return 0; + if (td->io_ops->commit) { + ret = td->io_ops->commit(td); + if (ret) + td_verror(td, -ret, "io commit"); + } + + /* + * Reflect that events were submitted as async IO requests. + */ + td->io_u_in_flight += td->io_u_queued; + td->io_u_queued = 0; } int td_io_open_file(struct thread_data *td, struct fio_file *f) { + if (fio_file_closing(f)) { + /* + * Open translates to undo closing. + */ + fio_file_clear_closing(f); + get_file(f); + return 0; + } + assert(!fio_file_open(f)); + assert(f->fd == -1); + assert(td->io_ops->open_file); + if (td->io_ops->open_file(td, f)) { if (td->error == EINVAL && td->o.odirect) log_err("fio: destination does not support O_DIRECT\n"); @@ -309,9 +475,19 @@ int td_io_open_file(struct thread_data *td, struct fio_file *f) td->o.nr_files); } + assert(f->fd == -1); + assert(!fio_file_open(f)); return 1; } + fio_file_reset(td, f); + fio_file_set_open(f); + fio_file_clear_closing(f); + disk_util_inc(f->du); + + td->nr_open_files++; + get_file(f); + if (f->filetype == FIO_TYPE_PIPE) { if (td_random(td)) { log_err("fio: can't seek on pipes (no random io)\n"); @@ -319,41 +495,67 @@ int td_io_open_file(struct thread_data *td, struct fio_file *f) } } - f->last_free_lookup = 0; - f->last_pos = f->file_offset; - f->flags |= FIO_FILE_OPEN; - f->flags &= ~FIO_FILE_CLOSING; - - if (td->io_ops->flags & FIO_DISKLESSIO) + if (td_ioengine_flagged(td, FIO_DISKLESSIO)) goto done; if (td->o.invalidate_cache && file_invalidate_cache(td, f)) goto err; - if (td->o.fadvise_hint && - (f->filetype == FIO_TYPE_BD || f->filetype == FIO_TYPE_FILE)) { + if (td->o.fadvise_hint != F_ADV_NONE && + (f->filetype == FIO_TYPE_BLOCK || f->filetype == FIO_TYPE_FILE)) { int flags; - if (td_random(td)) + if (td->o.fadvise_hint == F_ADV_TYPE) { + if (td_random(td)) + flags = POSIX_FADV_RANDOM; + else + flags = POSIX_FADV_SEQUENTIAL; + } else if (td->o.fadvise_hint == F_ADV_RANDOM) flags = POSIX_FADV_RANDOM; - else + else if (td->o.fadvise_hint == F_ADV_SEQUENTIAL) flags = POSIX_FADV_SEQUENTIAL; + else { + log_err("fio: unknown fadvise type %d\n", + td->o.fadvise_hint); + flags = POSIX_FADV_NORMAL; + } + + if (posix_fadvise(f->fd, f->file_offset, f->io_size, flags) < 0) { + if (!fio_did_warn(FIO_WARN_FADVISE)) + log_err("fio: fadvise hint failed\n"); + } + } +#ifdef FIO_HAVE_WRITE_HINT + if (fio_option_is_set(&td->o, write_hint) && + (f->filetype == FIO_TYPE_BLOCK || f->filetype == FIO_TYPE_FILE)) { + uint64_t hint = td->o.write_hint; + int cmd; - if (fadvise(f->fd, f->file_offset, f->io_size, flags) < 0) { - td_verror(td, errno, "fadvise"); + /* + * For direct IO, we just need/want to set the hint on + * the file descriptor. For buffered IO, we need to set + * it on the inode. + */ + if (td->o.odirect) + cmd = F_SET_FILE_RW_HINT; + else + cmd = F_SET_RW_HINT; + + if (fcntl(f->fd, cmd, &hint) < 0) { + td_verror(td, errno, "fcntl write hint"); goto err; } } +#endif - if (f->file_map) - memset(f->file_map, 0, f->num_maps * sizeof(long)); + if (td->o.odirect && !OS_O_DIRECT && fio_set_directio(td, f)) + goto err; done: log_file(td, f, FIO_LOG_OPEN_FILE); - td->nr_open_files++; - get_file(f); return 0; err: + disk_util_dec(f->du); if (td->io_ops->close_file) td->io_ops->close_file(td, f); return 1; @@ -361,15 +563,76 @@ err: int td_io_close_file(struct thread_data *td, struct fio_file *f) { - if (!(f->flags & FIO_FILE_CLOSING)) + if (!fio_file_closing(f)) log_file(td, f, FIO_LOG_CLOSE_FILE); /* * mark as closing, do real close when last io on it has completed */ - f->flags |= FIO_FILE_CLOSING; - - unlock_file_all(td, f); + fio_file_set_closing(f); return put_file(td, f); } + +int td_io_unlink_file(struct thread_data *td, struct fio_file *f) +{ + if (td->io_ops->unlink_file) + return td->io_ops->unlink_file(td, f); + else { + int ret; + + ret = unlink(f->file_name); + if (ret < 0) + return errno; + + return 0; + } +} + +int td_io_get_file_size(struct thread_data *td, struct fio_file *f) +{ + if (!td->io_ops->get_file_size) + return 0; + + return td->io_ops->get_file_size(td, f); +} + +int fio_show_ioengine_help(const char *engine) +{ + struct flist_head *entry; + struct thread_data td; + struct ioengine_ops *io_ops; + char *sep; + int ret = 1; + + if (!engine || !*engine) { + log_info("Available IO engines:\n"); + flist_for_each(entry, &engine_list) { + io_ops = flist_entry(entry, struct ioengine_ops, list); + log_info("\t%s\n", io_ops->name); + } + return 0; + } + sep = strchr(engine, ','); + if (sep) { + *sep = 0; + sep++; + } + + memset(&td, 0, sizeof(struct thread_data)); + td.o.ioengine = (char *)engine; + io_ops = load_ioengine(&td); + + if (!io_ops) { + log_info("IO engine %s not found\n", engine); + return 1; + } + + if (io_ops->options) + ret = show_cmd_help(io_ops->options, sep); + else + log_info("IO engine %s has no options\n", io_ops->name); + + free_ioengine(&td); + return ret; +}