From f5087211ef52c9c9c9cf9fb45b654878ec9278bd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Dec 2005 19:29:03 +0100 Subject: [PATCH] [PATCH] fio: support for posix aio --- Makefile | 4 +- fio-aio.c | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fio-ini.c | 42 +++++++-- fio.c | 96 +++++++------------- fio.h | 42 ++++++++- os-linux.h | 18 ++-- os.h | 6 ++ 7 files changed, 370 insertions(+), 88 deletions(-) create mode 100644 fio-aio.c diff --git a/Makefile b/Makefile index dc6e120..53f21e9 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ all: depend $(PROGS) $(SCRIPTS) dops: dops.o $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -laio -fio: fio.o fio-ini.o md5.o crc32.o - $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm +fio: fio.o fio-aio.o fio-ini.o md5.o crc32.o + $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt sgioread: sgioread.o $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) diff --git a/fio-aio.c b/fio-aio.c new file mode 100644 index 0000000..040bd08 --- /dev/null +++ b/fio-aio.c @@ -0,0 +1,250 @@ +#include +#include +#include +#include +#include "fio.h" +#include "os.h" + +#ifdef FIO_HAVE_LIBAIO + +#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj) + +struct libaio_data { + io_context_t aio_ctx; + struct io_event *aio_events; +}; + +static void fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u, + int read) +{ + if (read) + io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); + else + io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); +} + +static struct io_u *fio_libaio_event(struct thread_data *td, int event) +{ + struct libaio_data *ld = td->aio_data; + + return ev_to_iou(ld->aio_events + event); +} + +static int fio_libaio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct libaio_data *ld = td->aio_data; + + int r; + + do { + r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t); + if (r != -EAGAIN && r != -EINTR) + break; + } while (1); + + return r; +} + +static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct libaio_data *ld = td->aio_data; + struct iocb *iocb = &io_u->iocb; + int ret; + + do { + ret = io_submit(ld->aio_ctx, 1, &iocb); + if (ret == 1) + return 0; + else if (ret == -EAGAIN) + usleep(100); + else if (ret == -EINTR) + continue; + else + break; + } while (1); + + return ret; + +} + +static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u) +{ + struct libaio_data *ld = td->aio_data; + + return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events); +} + +int fio_libaio_init(struct thread_data *td) +{ + struct libaio_data *ld = malloc(sizeof(*ld)); + + memset(ld, 0, sizeof(*ld)); + if (io_queue_init(td->aio_depth, &ld->aio_ctx)) { + td_verror(td, errno); + return 1; + } + + td->io_prep = fio_libaio_io_prep; + td->io_queue = fio_libaio_queue; + td->io_getevents = fio_libaio_getevents; + td->io_event = fio_libaio_event; + td->io_cancel = fio_libaio_cancel; + + ld->aio_events = malloc(td->aio_depth * sizeof(struct io_event)); + td->aio_data = ld; + return 0; +} + +void fio_libaio_cleanup(struct thread_data *td) +{ + struct libaio_data *ld = td->aio_data; + + if (ld) { + io_destroy(ld->aio_ctx); + if (ld->aio_events) + free(ld->aio_events); + + free(ld); + td->aio_data = NULL; + } +} + +#else /* FIO_HAVE_LIBAIO */ + +int fio_libaio_init(struct thread_data *td) +{ + return EINVAL; +} + +void fio_libaio_cleanup(struct thread_data *td) +{ +} + +#endif /* FIO_HAVE_LIBAIO */ + +#ifdef FIO_HAVE_POSIXAIO + +struct posixaio_data { + struct io_u **aio_events; +}; + +static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u) +{ + int r = aio_cancel(td->fd, &io_u->aiocb); + + if (r == 1 || r == AIO_CANCELED) + return 0; + + return 1; +} + +static void fio_posixaio_prep(struct thread_data *td, struct io_u *io_u, + int read) +{ + struct aiocb *aiocb = &io_u->aiocb; + + aiocb->aio_fildes = td->fd; + aiocb->aio_buf = io_u->buf; + aiocb->aio_nbytes = io_u->buflen; + aiocb->aio_offset = io_u->offset; + + if (read) + aiocb->aio_lio_opcode = LIO_READ; + else + aiocb->aio_lio_opcode = LIO_WRITE; + + io_u->seen = 0; +} + +static int fio_posixaio_getevents(struct thread_data *td, int min, int max, + struct timespec *t) +{ + struct posixaio_data *pd = td->aio_data; + struct list_head *entry; + int r; + + r = 0; +restart: + list_for_each(entry, &td->io_u_busylist) { + struct io_u *io_u = list_entry(entry, struct io_u, list); + + if (io_u->seen) + continue; + + if (aio_error(&io_u->aiocb) != EINPROGRESS) { + pd->aio_events[r++] = io_u; + io_u->seen = 1; + } + + if (r >= max) + break; + } + + if (r >= min) + return r; + + /* + * hrmpf, we need to wait for more. we should use aio_suspend, for + * now just sleep a little and recheck status of busy-and-not-seen + */ + usleep(1000); + goto restart; +} + +static struct io_u *fio_posixaio_event(struct thread_data *td, int event) +{ + struct posixaio_data *pd = td->aio_data; + + return pd->aio_events[event]; +} + +static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct aiocb *aiocb = &io_u->aiocb; + + if (aiocb->aio_lio_opcode == LIO_READ) + return aio_read(aiocb); + else + return aio_write(aiocb); +} + +int fio_posixaio_init(struct thread_data *td) +{ + struct posixaio_data *pd = malloc(sizeof(*pd)); + + pd->aio_events = malloc(td->aio_depth * sizeof(struct io_u *)); + + td->io_prep = fio_posixaio_prep; + td->io_queue = fio_posixaio_queue; + td->io_getevents = fio_posixaio_getevents; + td->io_event = fio_posixaio_event; + td->io_cancel = fio_posixaio_cancel; + + td->aio_data = pd; + return 0; +} + +void fio_posixaio_cleanup(struct thread_data *td) +{ + struct posixaio_data *pd = td->aio_data; + + if (pd) { + free(pd->aio_events); + free(pd); + td->aio_data = NULL; + } +} + +#else /* FIO_HAVE_POSIXAIO */ + +int fio_posixaio_init(struct thread_data *td) +{ + return EINVAL; +} + +void fio_posixaio_cleanup(struct thread_data *td) +{ +} + +#endif /* FIO_HAVE_POSIXAIO */ diff --git a/fio-ini.c b/fio-ini.c index fc71e2b..36ca1ad 100644 --- a/fio-ini.c +++ b/fio-ini.c @@ -14,6 +14,7 @@ #define DEF_TIMEOUT (0) #define DEF_RATE_CYCLE (1000) #define DEF_ODIRECT (1) +#define DEF_IO_ENGINE (FIO_SYNCIO) #define DEF_SEQUENTIAL (1) #define DEF_RAND_REPEAT (1) #define DEF_OVERWRITE (1) @@ -119,7 +120,7 @@ static struct thread_data *get_new_job(int global, struct thread_data *parent) td->fsync_blocks = parent->fsync_blocks; td->start_delay = parent->start_delay; td->timeout = parent->timeout; - td->use_aio = parent->use_aio; + td->io_engine = parent->io_engine; td->create_file = parent->create_file; td->overwrite = parent->overwrite; td->invalidate_cache = parent->invalidate_cache; @@ -160,6 +161,19 @@ static int add_job(struct thread_data *td, const char *jobname, int prioclass, if (td == &def_thread) return 0; +#ifndef FIO_HAVE_LIBAIO + if (td->io_engine == FIO_LIBAIO) { + fprintf(stderr, "Linux libaio not available\n"); + return 1; + } +#endif +#ifndef FIO_HAVE_POSIXAIO + if (td->io_engine == FIO_POSIXAIO) { + fprintf(stderr, "posix aio not available\n"); + return 1; + } +#endif + td->filetype = FIO_TYPE_FILE; if (!stat(jobname, &sb) && S_ISBLK(sb.st_mode)) td->filetype = FIO_TYPE_BD; @@ -181,7 +195,7 @@ static int add_job(struct thread_data *td, const char *jobname, int prioclass, run_str[td->thread_number - 1] = 'P'; - if (td->use_aio) { + if (td->io_engine != FIO_SYNCIO) { if (!td->aio_depth) td->aio_depth = 1; if (td->use_mmap) @@ -208,7 +222,7 @@ static int add_job(struct thread_data *td, const char *jobname, int prioclass, if (write_bw_log) setup_log(&td->bw_log); - printf("Client%d (g=%d): rw=%d, prio=%d/%d, seq=%d, odir=%d, mmap=%d, bs=%d-%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, td->groupid, td->ddir, prioclass, prio, td->sequential, td->odirect, td->use_mmap, td->min_bs, td->max_bs, td->rate, td->use_aio, td->aio_depth); + printf("Client%d (g=%d): rw=%d, prio=%d/%d, seq=%d, odir=%d, mmap=%d, bs=%d-%d, rate=%d, ioengine=%d, aio_depth=%d\n", td->thread_number, td->groupid, td->ddir, prioclass, prio, td->sequential, td->odirect, td->use_mmap, td->min_bs, td->max_bs, td->rate, td->io_engine, td->aio_depth); /* * recurse add identical jobs, clear numjobs and stonewall options @@ -509,6 +523,24 @@ static int str_mem_cb(struct thread_data *td, char *mem) return 1; } +static int str_ioengine_cb(struct thread_data *td, char *str) +{ + if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3)) { + td->io_engine = FIO_LIBAIO; + return 0; + } else if (!strncmp(str, "posixaio", 8)) { + td->io_engine = FIO_POSIXAIO; + return 0; + } else if (!strncmp(str, "sync", 4)) { + td->io_engine = FIO_SYNCIO; + return 0; + } + + fprintf(stderr, "bad ioengine type: %s\n", str); + return 1; +} + + int parse_jobs_ini(char *file) { unsigned int prioclass, prio, cpu, global; @@ -681,8 +713,7 @@ int parse_jobs_ini(char *file) fgetpos(f, &off); continue; } - if (!check_strset(p, "aio")) { - td->use_aio = 1; + if (!check_str(p, "ioengine", str_ioengine_cb, td)) { fgetpos(f, &off); continue; } @@ -737,6 +768,7 @@ static int fill_def_thread(void) def_thread.bs = DEF_BS; def_thread.min_bs = -1; def_thread.max_bs = -1; + def_thread.io_engine = DEF_IO_ENGINE; def_thread.odirect = DEF_ODIRECT; def_thread.ratecycle = DEF_RATE_CYCLE; def_thread.sequential = DEF_SEQUENTIAL; diff --git a/fio.c b/fio.c index 1d12458..6e96d7b 100644 --- a/fio.c +++ b/fio.c @@ -68,21 +68,6 @@ enum { TD_REAPED, }; -/* - * The io unit - */ -struct io_u { - struct iocb iocb; - struct timeval start_time; - struct timeval issue_time; - - char *buf; - unsigned int buflen; - unsigned long long offset; - - struct list_head list; -}; - #define should_fsync(td) (td_write(td) && !(td)->odirect) static sem_t startup_sem; @@ -606,6 +591,11 @@ static struct io_u *get_io_u(struct thread_data *td) if (io_u->buflen + io_u->offset > td->file_size) io_u->buflen = td->file_size - io_u->offset; + if (!io_u->buflen) { + put_io_u(td, io_u); + return NULL; + } + if (!td->sequential) mark_random_map(td, io_u); @@ -614,12 +604,8 @@ static struct io_u *get_io_u(struct thread_data *td) if (td->verify != VERIFY_NONE) populate_io_u(td, io_u); - if (td->use_aio) { - if (td_read(td)) - io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); - else - io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); - } + if (td->io_prep) + td->io_prep(td, io_u, td_read(td)); gettimeofday(&io_u->start_time, NULL); return io_u; @@ -877,39 +863,15 @@ static void do_sync_io(struct thread_data *td) static int io_u_getevents(struct thread_data *td, int min, int max, struct timespec *t) { - int r; - - do { - r = io_getevents(td->aio_ctx, min, max, td->aio_events, t); - if (r != -EAGAIN && r != -EINTR) - break; - } while (1); - - return r; + return td->io_getevents(td, min, max, t); } static int io_u_queue(struct thread_data *td, struct io_u *io_u) { - struct iocb *iocb = &io_u->iocb; - int ret; - - do { - ret = io_submit(td->aio_ctx, 1, &iocb); - if (ret == 1) - return 0; - else if (ret == -EAGAIN) - usleep(100); - else if (ret == -EINTR) - continue; - else - break; - } while (1); - - return ret; + return td->io_queue(td, io_u); } #define iocb_time(iocb) ((unsigned long) (iocb)->data) -#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj) static int ios_completed(struct thread_data *td, int nr) { @@ -921,7 +883,7 @@ static int ios_completed(struct thread_data *td, int nr) gettimeofday(&e, NULL); for (i = 0, bytes_done = 0; i < nr; i++) { - io_u = ev_to_iou(td->aio_events + i); + io_u = td->io_event(td, i); td->io_blocks++; td->io_bytes += io_u->buflen; @@ -962,7 +924,7 @@ static void cleanup_pending_aio(struct thread_data *td) list_for_each_safe(entry, n, &td->io_u_busylist) { io_u = list_entry(entry, struct io_u, list); - r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events); + r = td->io_cancel(td, io_u); if (!r) put_io_u(td, io_u); } @@ -1013,7 +975,8 @@ static void do_async_verify(struct thread_data *td) break; } - io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset); + td->io_prep(td, io_u, 1); + ret = io_u_queue(td, io_u); if (ret) { put_io_u(td, io_u); @@ -1035,7 +998,7 @@ static void do_async_verify(struct thread_data *td) break; } - v_io_u = ev_to_iou(td->aio_events); + v_io_u = td->io_event(td, 0); td->cur_off = v_io_u->offset + v_io_u->buflen; @@ -1137,21 +1100,22 @@ static void do_async_io(struct thread_data *td) static void cleanup_aio(struct thread_data *td) { - io_destroy(td->aio_ctx); - - if (td->aio_events) - free(td->aio_events); + if (td->io_engine == FIO_LIBAIO) + fio_libaio_cleanup(td); + else if (td->io_engine == FIO_POSIXAIO) + fio_posixaio_cleanup(td); } static int init_aio(struct thread_data *td) { - if (io_queue_init(td->aio_depth, &td->aio_ctx)) { - td_verror(td, errno); + if (td->io_engine == FIO_LIBAIO) + return fio_libaio_init(td); + else if (td->io_engine == FIO_POSIXAIO) + return fio_posixaio_init(td); + else { + fprintf(stderr, "bad io_engine %d\n", td->io_engine); return 1; } - - td->aio_events = malloc(td->aio_depth * sizeof(struct io_event)); - return 0; } static void cleanup_io_u(struct thread_data *td) @@ -1187,7 +1151,7 @@ static int init_io_u(struct thread_data *td) int i, max_units; char *p; - if (!td->use_aio) + if (td->io_engine == FIO_SYNCIO) max_units = 1; else max_units = td->aio_depth; @@ -1730,7 +1694,7 @@ static void disk_util_timer_arm(void) static void clear_io_state(struct thread_data *td) { - if (!td->use_aio) + if (td->io_engine == FIO_SYNCIO) lseek(td->fd, SEEK_SET, 0); td->cur_off = 0; @@ -1773,7 +1737,7 @@ static void *thread_main(void *data) goto err; } - if (td->use_aio && init_aio(td)) + if ((td->io_engine != FIO_SYNCIO) && init_aio(td)) goto err; if (td->ioprio) { @@ -1805,7 +1769,7 @@ static void *thread_main(void *data) clear_io_state(td); prune_io_piece_log(td); - if (!td->use_aio) + if (td->io_engine == FIO_SYNCIO) do_sync_io(td); else do_async_io(td); @@ -1821,7 +1785,7 @@ static void *thread_main(void *data) clear_io_state(td); - if (!td->use_aio) + if (td->io_engine == FIO_SYNCIO) do_sync_verify(td); else do_async_verify(td); @@ -1847,7 +1811,7 @@ err: } if (td->mmap) munmap(td->mmap, td->file_size); - if (td->use_aio) + if (td->io_engine != FIO_SYNCIO) cleanup_aio(td); cleanup_io_u(td); if (ret) { diff --git a/fio.h b/fio.h index 10d41fe..930ca2e 100644 --- a/fio.h +++ b/fio.h @@ -2,7 +2,6 @@ #define FIO_H #include -#include #include #include #include @@ -12,6 +11,7 @@ #include "md5.h" #include "crc32.h" #include "arch.h" +#include "os.h" struct io_stat { unsigned long val; @@ -38,6 +38,30 @@ struct io_piece { unsigned int len; }; +/* + * The io unit + */ +struct io_u { + union { +#ifdef FIO_HAVE_LIBAIO + struct iocb iocb; +#endif +#ifdef FIO_HAVE_POSIXAIO + struct aiocb aiocb; +#endif + }; + struct timeval start_time; + struct timeval issue_time; + + char *buf; + unsigned int buflen; + unsigned long long offset; + + unsigned char seen; + + struct list_head list; +}; + #define FIO_HDR_MAGIC 0xf00baaef enum { @@ -91,7 +115,7 @@ struct thread_data { unsigned int fsync_blocks; unsigned int start_delay; unsigned int timeout; - unsigned int use_aio; + unsigned int io_engine; unsigned int create_file; unsigned int overwrite; unsigned int invalidate_cache; @@ -117,9 +141,13 @@ struct thread_data { unsigned long long cur_off; - io_context_t aio_ctx; + void *aio_data; + void (*io_prep)(struct thread_data *, struct io_u *, int); + int (*io_queue)(struct thread_data *, struct io_u *); + int (*io_getevents)(struct thread_data *, int, int, struct timespec *); + struct io_u *(*io_event)(struct thread_data *, int); + int (*io_cancel)(struct thread_data *, struct io_u *); unsigned int aio_depth; - struct io_event *aio_events; unsigned int cur_depth; struct list_head io_u_freelist; @@ -217,6 +245,12 @@ enum { FIO_TYPE_BD, }; +enum { + FIO_SYNCIO = 0, + FIO_LIBAIO, + FIO_POSIXAIO, +}; + #define td_read(td) ((td)->ddir == DDIR_READ) #define td_write(td) ((td)->ddir == DDIR_WRITE) diff --git a/os-linux.h b/os-linux.h index 1a0b08e..c7c0aa1 100644 --- a/os-linux.h +++ b/os-linux.h @@ -1,20 +1,16 @@ #ifndef FIO_OS_LINUX_H #define FIO_OS_LINUX_H -#define FIO_HAVE_LIBAIO (1) -#define FIO_HAVE_POSIXAIO (1) -#define FIO_HAVE_FADVISE (1) +#include +#include + +#define FIO_HAVE_LIBAIO +#define FIO_HAVE_POSIXAIO +#define FIO_HAVE_FADVISE /* * we want fadvise64 really, but it's so tangled... later */ -static int fadvise(int fd, loff_t offset, size_t len, int advice) -{ -#if 0 - return syscall(__NR_fadvise64, fd, offset, offset >> 32, len, advice); -#else - return posix_fadvise(fd, (off_t) offset, len, advice); -#endif -} +#define fadvise(fd, off, len, advice) posix_fadvise((fd), (off_t)(off), (len), (advice)) #endif diff --git a/os.h b/os.h index e7cb53d..069f9c8 100644 --- a/os.h +++ b/os.h @@ -18,4 +18,10 @@ static int fadvise(int fd, loff_t offset, size_t len, int advice) #define POSIX_FADV_RANDOM (0) #endif /* FIO_HAVE_FADVISE */ +struct thread_data; +extern int fio_libaio_init(struct thread_data *); +extern void fio_libaio_cleanup(struct thread_data *); +extern int fio_posixaio_init(struct thread_data *); +extern void fio_posixaio_cleanup(struct thread_data *); + #endif -- 2.25.1