From: Jens Axboe Date: Sat, 27 May 2006 18:24:53 +0000 (+0200) Subject: [PATCH] Support for splice IO engine X-Git-Tag: fio-1.4~18 X-Git-Url: https://git.kernel.dk/?p=fio.git;a=commitdiff_plain;h=8756e4d421722eaeb089067aeaaf317d05d53a57;hp=c9fad893e8333682d6ab67a269b0dd74f303c5bc;ds=sidebyside [PATCH] Support for splice IO engine --- diff --git a/README b/README index f6094a62..92cf5c49 100644 --- a/README +++ b/README @@ -44,10 +44,10 @@ The format is as follows: size=x Set file size to x bytes (x string can include k/m/g) ioengine=x 'x' may be: aio/libaio/linuxaio for Linux aio, posixaio for POSIX aio, sync for regular read/write io, - mmap for mmap'ed io, or sgio for direct SG_IO io. The - latter only works on Linux on SCSI (or SCSI-like - devices, such as usb-storage or sata/libata driven) - devices. + mmap for mmap'ed io, splice for using splice/vmsplice, + or sgio for direct SG_IO io. The latter only works on + Linux on SCSI (or SCSI-like devices, such as + usb-storage or sata/libata driven) devices. iodepth=x For async io, allow 'x' ios in flight overwrite=x If 'x', layout a write file first. prio=x Run io at prio X, 0-7 is the kernel allowed range diff --git a/arch-ia64.h b/arch-ia64.h index c9c05085..b3ee0c38 100644 --- a/arch-ia64.h +++ b/arch-ia64.h @@ -12,6 +12,12 @@ #define __NR_fadvise64 1234 #endif +#ifndef __NR_sys_splice +#define __NR_sys_splice 1297 +#define __NR_sys_tee 1301 +#define __NR_sys_vmsplice 1302 +#endif + #define nop asm volatile ("hint @pause" ::: "memory"); #define ia64_popcnt(x) \ diff --git a/arch-ppc.h b/arch-ppc.h index e16f99ba..8bfed597 100644 --- a/arch-ppc.h +++ b/arch-ppc.h @@ -12,6 +12,12 @@ #define __NR_fadvise64 233 #endif +#ifndef __NR_sys_splice +#define __NR_sys_splice 283 +#define __NR_sys_tee 284 +#define __NR_sys_vmsplice 285 +#endif + #define nop do { } while (0) static inline int __ilog2(unsigned long bitmask) diff --git a/arch-x86.h b/arch-x86.h index 4e74c0c3..96b46027 100644 --- a/arch-x86.h +++ b/arch-x86.h @@ -12,6 +12,12 @@ #define __NR_fadvise64 250 #endif +#ifndef __NR_sys_splice +#define __NR_sys_splice 313 +#define __NR_sys_tee 315 +#define __NR_sys_vmsplice 316 +#endif + #define nop __asm__ __volatile__("rep;nop": : :"memory") static inline unsigned long ffz(unsigned long bitmask) diff --git a/arch-x86_64.h b/arch-x86_64.h index cca66f68..221ab907 100644 --- a/arch-x86_64.h +++ b/arch-x86_64.h @@ -12,6 +12,12 @@ #define __NR_fadvise64 221 #endif +#ifndef __NR_sys_splice +#define __NR_sys_splice 275 +#define __NR_sys_tee 276 +#define __NR_sys_vmsplice 278 +#endif + #define nop __asm__ __volatile__("rep;nop": : :"memory") static inline unsigned long ffz(unsigned long bitmask) diff --git a/fio-ini.c b/fio-ini.c index 23810baf..109bc149 100644 --- a/fio-ini.c +++ b/fio-ini.c @@ -161,8 +161,7 @@ static void put_job(struct thread_data *td) thread_number--; } -static int add_job(struct thread_data *td, const char *jobname, int prioclass, - int prio) +static int add_job(struct thread_data *td, const char *jobname) { char *ddir_str[] = { "read", "write", "randread", "randwrite", "rw", NULL, "randrw" }; @@ -181,9 +180,6 @@ static int add_job(struct thread_data *td, const char *jobname, int prioclass, return 1; } #endif -#ifdef FIO_HAVE_IOPRIO - td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio; -#endif /* * the def_thread is just for options, it's not a real job @@ -249,7 +245,7 @@ static int add_job(struct thread_data *td, const char *jobname, int prioclass, setup_log(&td->bw_log); ddir = td->ddir + (!td->sequential << 1) + (td->iomix << 2); - printf("Client%d (g=%d): rw=%s, prio=%d/%d, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], prioclass, prio, td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); + printf("Client%d (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->thread_number, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth); /* * recurse add identical jobs, clear numjobs and stonewall options @@ -266,7 +262,7 @@ static int add_job(struct thread_data *td, const char *jobname, int prioclass, td_new->stonewall = 0; td_new->jobnum = numjobs; - if (add_job(td_new, jobname, prioclass, prio)) + if (add_job(td_new, jobname)) goto err; } return 0; @@ -622,6 +618,10 @@ static int str_ioengine_cb(struct thread_data *td, char *str) strcpy(td->io_engine_name, "sgio"); td->io_engine = FIO_SGIO; return 0; + } else if (!strncmp(str, "splice", 6)) { + strcpy(td->io_engine_name, "splice"); + td->io_engine = FIO_SPLICEIO; + return 0; } fprintf(stderr, "bad ioengine type: %s\n", str); @@ -670,9 +670,6 @@ int parse_jobs_ini(char *file) if (!td) return 1; - prioclass = 2; - prio = 4; - fgetpos(f, &off); while ((p = fgets(string, 4096, f)) != NULL) { if (is_empty_or_comment(p)) @@ -685,6 +682,7 @@ int parse_jobs_ini(char *file) fprintf(stderr, "io priorities not available\n"); return 1; #endif + td->ioprio |= prio; fgetpos(f, &off); continue; } @@ -693,6 +691,7 @@ int parse_jobs_ini(char *file) fprintf(stderr, "io priorities not available\n"); return 1; #endif + td->ioprio |= prioclass << IOPRIO_CLASS_SHIFT; fgetpos(f, &off); continue; } @@ -856,7 +855,7 @@ int parse_jobs_ini(char *file) } fsetpos(f, &off); - if (add_job(td, name, prioclass, prio)) + if (add_job(td, name)) return 1; } diff --git a/fio-io.c b/fio-io.c index aaf8fe19..f3606e17 100644 --- a/fio-io.c +++ b/fio-io.c @@ -751,3 +751,161 @@ int fio_sgio_init(struct thread_data *td) } #endif /* FIO_HAVE_SGIO */ + +#ifdef FIO_HAVE_SPLICE +struct spliceio_data { + struct io_u *last_io_u; + int pipe[2]; +}; + +static struct io_u *fio_spliceio_event(struct thread_data *td, int event) +{ + struct syncio_data *sd = td->io_data; + + assert(event == 0); + + return sd->last_io_u; +} + +/* + * For splice reading, we unfortunately cannot (yet) vmsplice the other way. + * So just splice the data from the file into the pipe, and use regular + * read to fill the buffer. Doesn't make a lot of sense, but... + */ +static int fio_splice_read(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_data; + int ret, ret2, buflen; + void *p; + + buflen = io_u->buflen; + p = io_u->buf; + do { + off_t off = io_u->offset; + + ret = splice(td->fd, &off, sd->pipe[1], NULL, buflen, 0); + if (ret < 0) + return errno; + + buflen -= ret; + + while (ret) { + ret2 = read(sd->pipe[0], p, ret); + if (ret2 < 0) + return errno; + + ret -= ret2; + p += ret2; + } + } while (buflen); + + return io_u->buflen; +} + +/* + * For splice writing, we can vmsplice our data buffer directly into a + * pipe and then splice that to a file. + */ +static int fio_splice_write(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_data; + struct iovec iov[1] = { + { + .iov_base = io_u->buf, + .iov_len = io_u->buflen, + } + }; + struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, }; + int ret, ret2; + + while (iov[0].iov_len) { + if (poll(&pfd, 1, -1) < 0) + return errno; + + ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK); + if (ret < 0) + return errno; + + iov[0].iov_len -= ret; + iov[0].iov_base += ret; + + while (ret) { + off_t off = io_u->offset; + + ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0); + if (ret2 < 0) + return errno; + + ret -= ret2; + } + } + + return io_u->buflen; +} + +static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u) +{ + struct spliceio_data *sd = td->io_data; + int ret; + + if (io_u->ddir == DDIR_READ) + ret = fio_splice_read(td, io_u); + else + ret = fio_splice_write(td, io_u); + + if ((unsigned int) ret != io_u->buflen) { + if (ret > 0) { + io_u->resid = io_u->buflen - ret; + io_u->error = ENODATA; + } else + io_u->error = errno; + } + + if (!io_u->error) + sd->last_io_u = io_u; + + return io_u->error; +} + +static void fio_spliceio_cleanup(struct thread_data *td) +{ + struct spliceio_data *sd = td->io_data; + + if (sd) { + close(sd->pipe[0]); + close(sd->pipe[1]); + free(sd); + td->io_data = NULL; + } +} + +int fio_spliceio_init(struct thread_data *td) +{ + struct spliceio_data *sd = malloc(sizeof(*sd)); + + td->io_queue = fio_spliceio_queue; + td->io_getevents = fio_syncio_getevents; + td->io_event = fio_spliceio_event; + td->io_cancel = NULL; + td->io_cleanup = fio_spliceio_cleanup; + td->io_sync = fio_io_sync; + + sd->last_io_u = NULL; + if (pipe(sd->pipe) < 0) { + td_verror(td, errno); + free(sd); + return 1; + } + + td->io_data = sd; + return 0; +} + +#else /* FIO_HAVE_SPLICE */ + +int fio_spliceio_init(struct thread_data *td) +{ + return EINVAL; +} + +#endif /* FIO_HAVE_SPLICE */ diff --git a/fio.c b/fio.c index bc008343..9510dee4 100644 --- a/fio.c +++ b/fio.c @@ -1158,6 +1158,8 @@ static int init_io(struct thread_data *td) return fio_posixaio_init(td); else if (td->io_engine == FIO_SGIO) return fio_sgio_init(td); + else if (td->io_engine == FIO_SPLICEIO) + return fio_spliceio_init(td); else { fprintf(stderr, "bad io_engine %d\n", td->io_engine); return 1; diff --git a/fio.h b/fio.h index e997fbe5..fe7efbdc 100644 --- a/fio.h +++ b/fio.h @@ -278,6 +278,7 @@ enum { FIO_LIBAIO = 1 << 2, FIO_POSIXAIO = 1 << 3, FIO_SGIO = 1 << 4, + FIO_SPLICEIO = 1 << 5 | FIO_SYNCIO, }; #define td_read(td) ((td)->ddir == DDIR_READ) diff --git a/os-linux.h b/os-linux.h index ef2af7c7..865853ba 100644 --- a/os-linux.h +++ b/os-linux.h @@ -2,6 +2,8 @@ #define FIO_OS_LINUX_H #include +#include +#include #define FIO_HAVE_LIBAIO #define FIO_HAVE_POSIXAIO @@ -10,6 +12,7 @@ #define FIO_HAVE_DISK_UTIL #define FIO_HAVE_SGIO #define FIO_HAVE_IOPRIO +#define FIO_HAVE_SPLICE #define OS_MAP_ANON (MAP_ANONYMOUS) @@ -31,6 +34,34 @@ static inline int ioprio_set(int which, int who, int ioprio) return syscall(__NR_ioprio_set, which, who, ioprio); } +static _syscall6(int, sys_splice, int, fdin, loff_t *, off_in, int, fdout, loff_t *, off_out, size_t, len, unsigned int, flags); +static _syscall4(int, sys_vmsplice, int, fd, const struct iovec *, iov, unsigned long, nr_segs, unsigned int, flags); +static _syscall4(int, sys_tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); + +static inline int splice(int fdin, loff_t *off_in, int fdout, loff_t *off_out, + size_t len, unsigned long flags) +{ + return sys_splice(fdin, off_in, fdout, off_out, len, flags); +} + +static inline int tee(int fdin, int fdout, size_t len, unsigned int flags) +{ + return sys_tee(fdin, fdout, len, flags); +} + +static inline int vmsplice(int fd, const struct iovec *iov, + unsigned long nr_segs, unsigned int flags) +{ + return sys_vmsplice(fd, iov, nr_segs, flags); +} + +#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ +#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ + /* we may still block on the fd we splice */ + /* from/to, of course */ +#define SPLICE_F_MORE (0x04) /* expect more data */ +#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ + enum { IOPRIO_WHO_PROCESS = 1, IOPRIO_WHO_PGRP, diff --git a/os.h b/os.h index 3ee13689..093a722e 100644 --- a/os.h +++ b/os.h @@ -45,5 +45,6 @@ extern int fio_posixaio_init(struct thread_data *); extern int fio_syncio_init(struct thread_data *); extern int fio_mmapio_init(struct thread_data *); extern int fio_sgio_init(struct thread_data *); +extern int fio_spliceio_init(struct thread_data *); #endif