This is likely very buggy, a simple test works though.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
all: depend $(PROGS) $(SCRIPTS)
$(MAKE) -C engines
-fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o
+fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o filesetup.o
$(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt -ldl
clean:
usb-storage or sata/libata driven) devices.
iodepth=x For async io, allow 'x' ios in flight
overwrite=x If 'x', layout a write file first.
+ nrfiles=x Spread io load over 'x' number of files per job,
+ if possible.
prio=x Run io at prio X, 0-7 is the kernel allowed range
prioclass=x Run io at prio class X
bs=x Use 'x' for thread blocksize. May include k/m postfix.
td->cpuload = 100;
td->read_iolog = td->write_iolog = 0;
- td->fd = -1;
+ td->nr_files = 0;
return 0;
}
struct io_event *aio_events;
};
-static int fio_libaio_sync(struct thread_data *td)
+static int fio_libaio_sync(struct thread_data *td, struct fio_file *f)
{
- return fsync(td->fd);
+ return fsync(f->fd);
}
static int fio_libaio_prep(struct thread_data *td, struct io_u *io_u)
{
+ struct fio_file *f = io_u->file;
+
if (io_u->ddir == DDIR_READ)
- io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+ io_prep_pread(&io_u->iocb, f->fd, io_u->buf, io_u->buflen, io_u->offset);
else
- io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+ io_prep_pwrite(&io_u->iocb, f->fd, io_u->buf, io_u->buflen, io_u->offset);
return 0;
}
static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
{
- unsigned long long real_off = io_u->offset - td->file_offset;
+ struct fio_file *f = io_u->file;
+ unsigned long long real_off = io_u->offset - f->file_offset;
struct mmapio_data *sd = td->io_ops->data;
if (io_u->ddir == DDIR_READ)
- memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
+ memcpy(io_u->buf, f->mmap + real_off, io_u->buflen);
else
- memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+ memcpy(f->mmap + real_off, io_u->buf, io_u->buflen);
/*
* not really direct, but should drop the pages from the cache
*/
if (td->odirect) {
- if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+ if (msync(f->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
io_u->error = errno;
- if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
+ if (madvise(f->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
io_u->error = errno;
}
return io_u->error;
}
-static int fio_mmapio_sync(struct thread_data *td)
+static int fio_mmapio_sync(struct thread_data *td, struct fio_file *f)
{
- return msync(td->mmap, td->file_size, MS_SYNC);
+ return msync(f->mmap, f->file_size, MS_SYNC);
}
static void fio_mmapio_cleanup(struct thread_data *td)
return sec + nsec;
}
-static int fio_posixaio_sync(struct thread_data *td)
+static int fio_posixaio_sync(struct thread_data *td, struct fio_file *f)
{
- return fsync(td->fd);
+ return fsync(f->fd);
}
static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
{
- int r = aio_cancel(td->fd, &io_u->aiocb);
+ struct fio_file *f = io_u->file;
+ int r = aio_cancel(f->fd, &io_u->aiocb);
if (r == 1 || r == AIO_CANCELED)
return 0;
static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
{
struct aiocb *aiocb = &io_u->aiocb;
+ struct fio_file *f = io_u->file;
- aiocb->aio_fildes = td->fd;
+ aiocb->aio_fildes = f->fd;
aiocb->aio_buf = io_u->buf;
aiocb->aio_nbytes = io_u->buflen;
aiocb->aio_offset = io_u->offset;
static int fio_sgio_getevents(struct thread_data *td, int min, int max,
struct timespec fio_unused *t)
{
+ struct fio_file *f = &td->files[0];
struct sgio_data *sd = td->io_ops->data;
- struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
+ struct pollfd pfd = { .fd = f->fd, .events = POLLIN };
void *buf = malloc(max * sizeof(struct sg_io_hdr));
int left = max, ret, events, i, r = 0, fl = 0;
* don't block for !events
*/
if (!min) {
- fl = fcntl(td->fd, F_GETFL);
- fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
+ fl = fcntl(f->fd, F_GETFL);
+ fcntl(f->fd, F_SETFL, fl | O_NONBLOCK);
}
while (left) {
break;
} while (1);
- ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
+ ret = read(f->fd, buf, left * sizeof(struct sg_io_hdr));
if (ret < 0) {
if (errno == EAGAIN)
break;
}
if (!min)
- fcntl(td->fd, F_SETFL, fl);
+ fcntl(f->fd, F_SETFL, fl);
free(buf);
return r;
}
-static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
+static int fio_sgio_ioctl_doio(struct thread_data *td, struct fio_file *f,
+ struct io_u *io_u)
{
struct sgio_data *sd = td->io_ops->data;
struct sg_io_hdr *hdr = &io_u->hdr;
sd->events[0] = io_u;
- return ioctl(td->fd, SG_IO, hdr);
+ return ioctl(f->fd, SG_IO, hdr);
}
-static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
+static int fio_sgio_rw_doio(struct thread_data *td, struct fio_file *f,
+ struct io_u *io_u, int sync)
{
struct sg_io_hdr *hdr = &io_u->hdr;
int ret;
- ret = write(td->fd, hdr, sizeof(*hdr));
+ ret = write(f->fd, hdr, sizeof(*hdr));
if (ret < 0)
return errno;
if (sync) {
- ret = read(td->fd, hdr, sizeof(*hdr));
+ ret = read(f->fd, hdr, sizeof(*hdr));
if (ret < 0)
return errno;
}
static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
{
+ struct fio_file *f = io_u->file;
+
if (td->filetype == FIO_TYPE_BD)
- return fio_sgio_ioctl_doio(td, io_u);
+ return fio_sgio_ioctl_doio(td, f, io_u);
- return fio_sgio_rw_doio(td, io_u, sync);
+ return fio_sgio_rw_doio(td, f, io_u, sync);
}
-static int fio_sgio_sync(struct thread_data *td)
+static int fio_sgio_sync(struct thread_data *td, struct fio_file *f)
{
struct sgio_data *sd = td->io_ops->data;
struct sg_io_hdr *hdr;
static int fio_sgio_init(struct thread_data *td)
{
+ struct fio_file *f = &td->files[0];
struct sgio_data *sd;
unsigned int bs;
int ret;
td->io_ops->data = sd;
if (td->filetype == FIO_TYPE_BD) {
- if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
+ if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
td_verror(td, errno);
return 1;
}
} else if (td->filetype == FIO_TYPE_CHAR) {
int version;
- if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
+ if (ioctl(f->fd, SG_GET_VERSION_NUM, &version) < 0) {
td_verror(td, errno);
return 1;
}
int pipe[2];
};
-static int fio_spliceio_sync(struct thread_data *td)
+static int fio_spliceio_sync(struct thread_data *td, struct fio_file *f)
{
- return fsync(td->fd);
+ return fsync(f->fd);
}
static int fio_spliceio_getevents(struct thread_data *td, int fio_unused min,
static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
{
struct spliceio_data *sd = td->io_ops->data;
+ struct fio_file *f = io_u->file;
int ret, ret2, buflen;
off_t offset;
void *p;
if (this_len > SPLICE_DEF_SIZE)
this_len = SPLICE_DEF_SIZE;
- ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
+ ret = splice(f->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
if (ret < 0) {
if (errno == ENODATA || errno == EAGAIN)
continue;
}
};
struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
+ struct fio_file *f = io_u->file;
off_t off = io_u->offset;
int ret, ret2;
iov[0].iov_base += ret;
while (ret) {
- ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
+ ret2 = splice(sd->pipe[0], NULL, f->fd, &off, ret, 0);
if (ret2 < 0)
return errno;
struct io_u *last_io_u;
};
-static int fio_syncio_sync(struct thread_data *td)
+static int fio_syncio_sync(struct thread_data *td, struct fio_file *f)
{
- return fsync(td->fd);
+ return fsync(f->fd);
}
static int fio_syncio_getevents(struct thread_data *td, int fio_unused min,
static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
{
- if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
+ struct fio_file *f = io_u->file;
+
+ if (lseek(f->fd, io_u->offset, SEEK_SET) == -1) {
td_verror(td, errno);
return 1;
}
static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
{
struct syncio_data *sd = td->io_ops->data;
+ struct fio_file *f = io_u->file;
int ret;
if (io_u->ddir == DDIR_READ)
- ret = read(td->fd, io_u->buf, io_u->buflen);
+ ret = read(f->fd, io_u->buf, io_u->buflen);
else
- ret = write(td->fd, io_u->buf, io_u->buflen);
+ ret = write(f->fd, io_u->buf, io_u->buflen);
if ((unsigned int) ret != io_u->buflen) {
if (ret > 0) {
--- /dev/null
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "fio.h"
+#include "os.h"
+
+static int create_file(struct thread_data *td, struct fio_file *f)
+{
+ unsigned long long left;
+ unsigned int bs;
+ char *b;
+ int r;
+
+ f->fd = open(f->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (f->fd < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (ftruncate(f->fd, f->file_size) == -1) {
+ td_verror(td, errno);
+ goto err;
+ }
+
+ b = malloc(td->max_bs);
+ memset(b, 0, td->max_bs);
+
+ left = f->file_size;
+ while (left && !td->terminate) {
+ bs = td->max_bs;
+ if (bs > left)
+ bs = left;
+
+ r = write(f->fd, b, bs);
+
+ if (r == (int) bs) {
+ left -= bs;
+ continue;
+ } else {
+ if (r < 0)
+ td_verror(td, errno);
+ else
+ td_verror(td, EIO);
+
+ break;
+ }
+ }
+
+ if (td->terminate)
+ unlink(f->file_name);
+ else if (td->create_fsync)
+ fsync(f->fd);
+
+ free(b);
+ close(f->fd);
+ f->fd = -1;
+ return 0;
+err:
+ close(f->fd);
+ f->fd = -1;
+ return 1;
+}
+
+static int create_files(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i, err;
+
+ /*
+ * unless specifically asked for overwrite, let normal io extend it
+ */
+ if (!td->overwrite) {
+ td->io_size = td->total_file_size;
+ for_each_file(td, f, i)
+ f->file_size = td->total_file_size / td->nr_files;
+
+ return 0;
+ }
+
+ if (!td->total_file_size) {
+ log_err("Need size for create\n");
+ td_verror(td, EINVAL);
+ return 1;
+ }
+
+ temp_stall_ts = 1;
+ fprintf(f_out, "%s: Laying out IO file(s) (%LuMiB)\n",
+ td->name, td->total_file_size >> 20);
+
+ err = 0;
+ for_each_file(td, f, i) {
+ f->file_size = td->total_file_size / td->nr_files;
+ err = create_file(td, f);
+ break;
+
+ td->io_size += f->file_size;
+ }
+
+ temp_stall_ts = 0;
+ return err;
+}
+
+static int file_size(struct thread_data *td, struct fio_file *f)
+{
+ struct stat st;
+
+ if (td->overwrite) {
+ if (fstat(f->fd, &st) == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ f->real_file_size = st.st_size;
+
+ if (!f->file_size || f->file_size > f->real_file_size)
+ f->file_size = f->real_file_size;
+ }
+
+ f->file_size -= f->file_offset;
+ return 0;
+}
+
+static int bdev_size(struct thread_data *td, struct fio_file *f)
+{
+ unsigned long long bytes;
+ int r;
+
+ r = blockdev_size(f->fd, &bytes);
+ if (r) {
+ td_verror(td, r);
+ return 1;
+ }
+
+ f->real_file_size = bytes;
+
+ /*
+ * no extend possibilities, so limit size to device size if too large
+ */
+ if (!f->file_size || f->file_size > f->real_file_size)
+ f->file_size = f->real_file_size;
+
+ f->file_size -= f->file_offset;
+ return 0;
+}
+
+static int get_file_size(struct thread_data *td, struct fio_file *f)
+{
+ int ret = 0;
+
+ if (td->filetype == FIO_TYPE_FILE)
+ ret = file_size(td, f);
+ else if (td->filetype == FIO_TYPE_BD)
+ ret = bdev_size(td, f);
+ else
+ f->real_file_size = -1;
+
+ if (ret)
+ return ret;
+
+ if (f->file_offset > f->real_file_size) {
+ log_err("%s: offset extends end (%Lu > %Lu)\n", td->name, f->file_offset, f->real_file_size);
+ return 1;
+ }
+
+ td->io_size += f->file_size;
+ return 0;
+}
+
+static int __setup_file_mmap(struct thread_data *td, struct fio_file *f)
+{
+ int flags;
+
+ if (td_rw(td))
+ flags = PROT_READ | PROT_WRITE;
+ else if (td_write(td)) {
+ flags = PROT_WRITE;
+
+ if (td->verify != VERIFY_NONE)
+ flags |= PROT_READ;
+ } else
+ flags = PROT_READ;
+
+ f->mmap = mmap(NULL, f->file_size, flags, MAP_SHARED, f->fd, f->file_offset);
+ if (f->mmap == MAP_FAILED) {
+ f->mmap = NULL;
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (td->invalidate_cache) {
+ if (madvise(f->mmap, f->file_size, MADV_DONTNEED) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ if (td->sequential) {
+ if (madvise(f->mmap, f->file_size, MADV_SEQUENTIAL) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ } else {
+ if (madvise(f->mmap, f->file_size, MADV_RANDOM) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_files_mmap(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i, err = 0;
+
+ for_each_file(td, f, i) {
+ err = __setup_file_mmap(td, f);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int __setup_file_plain(struct thread_data *td, struct fio_file *f)
+{
+ if (td->invalidate_cache) {
+ if (fadvise(f->fd, f->file_offset, f->file_size, POSIX_FADV_DONTNEED) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ if (td->sequential) {
+ if (fadvise(f->fd, f->file_offset, f->file_size, POSIX_FADV_SEQUENTIAL) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ } else {
+ if (fadvise(f->fd, f->file_offset, f->file_size, POSIX_FADV_RANDOM) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_files_plain(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i, err = 0;
+
+ for_each_file(td, f, i) {
+ err = __setup_file_plain(td, f);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int setup_file(struct thread_data *td, struct fio_file *f)
+{
+ struct stat st;
+ int flags = 0;
+
+ if (stat(f->file_name, &st) == -1) {
+ if (errno != ENOENT) {
+ td_verror(td, errno);
+ return 1;
+ }
+ if (!td->create_file) {
+ td_verror(td, ENOENT);
+ return 1;
+ }
+ if (create_file(td, f))
+ return 1;
+ } else if (td->filetype == FIO_TYPE_FILE &&
+ st.st_size < (off_t) f->file_size) {
+ if (create_file(td, f))
+ return 1;
+ }
+
+ if (td->odirect)
+ flags |= OS_O_DIRECT;
+
+ if (td_write(td) || td_rw(td)) {
+ if (td->filetype == FIO_TYPE_FILE) {
+ if (!td->overwrite)
+ flags |= O_TRUNC;
+
+ flags |= O_CREAT;
+ }
+ if (td->sync_io)
+ flags |= O_SYNC;
+
+ flags |= O_RDWR;
+
+ f->fd = open(f->file_name, flags, 0600);
+ } else {
+ if (td->filetype == FIO_TYPE_CHAR)
+ flags |= O_RDWR;
+ else
+ flags |= O_RDONLY;
+
+ f->fd = open(f->file_name, flags);
+ }
+
+ if (f->fd == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ if (get_file_size(td, f))
+ return 1;
+
+ return 0;
+}
+
+int setup_files(struct thread_data *td)
+{
+ struct fio_file *f;
+ int i, err;
+
+ /*
+ * if ioengine defines a setup() method, it's responsible for
+ * setting up everything in the td->files[] area.
+ */
+ if (td->io_ops->setup)
+ return td->io_ops->setup(td);
+
+ if (create_files(td))
+ return 1;
+
+ for_each_file(td, f, i) {
+ err = setup_file(td, f);
+ if (err)
+ break;
+ }
+
+ if (td->io_size == 0) {
+ log_err("%s: no io blocks\n", td->name);
+ td_verror(td, EINVAL);
+ return 1;
+ }
+
+ if (!td->zone_size)
+ td->zone_size = td->io_size;
+
+ td->total_io_size = td->io_size * td->loops;
+
+ if (td->io_ops->flags & FIO_MMAPIO)
+ return setup_files_mmap(td);
+ else
+ return setup_files_plain(td);
+}
+
+void close_files(struct thread_data *td)
+{
+ int i;
+
+ for (i = 0; i < td->nr_files; i++) {
+ struct fio_file *f = &td->files[i];
+
+ if (f->fd != -1) {
+ close(f->fd);
+ f->fd = -1;
+ }
+ if (f->mmap) {
+ munmap(f->mmap, f->file_size);
+ f->mmap = NULL;
+ }
+ }
+}
static char run_str[MAX_JOBS + 1];
int shm_id = 0;
static struct timeval genesis;
-static int temp_stall_ts;
+int temp_stall_ts;
char *fio_inst_prefix = _INST_PREFIX;
static void print_thread_status(void);
* The ->file_map[] contains a map of blocks we have or have not done io
* to yet. Used to make sure we cover the entire range in a fair fashion.
*/
-static int random_map_free(struct thread_data *td, unsigned long long block)
+static int random_map_free(struct thread_data *td, struct fio_file *f,
+ unsigned long long block)
{
- unsigned int idx = RAND_MAP_IDX(td, block);
- unsigned int bit = RAND_MAP_BIT(td, block);
+ unsigned int idx = RAND_MAP_IDX(td, f, block);
+ unsigned int bit = RAND_MAP_BIT(td, f, block);
- return (td->file_map[idx] & (1UL << bit)) == 0;
+ return (f->file_map[idx] & (1UL << bit)) == 0;
}
/*
* Return the next free block in the map.
*/
-static int get_next_free_block(struct thread_data *td, unsigned long long *b)
+static int get_next_free_block(struct thread_data *td, struct fio_file *f,
+ unsigned long long *b)
{
int i;
*b = 0;
i = 0;
- while ((*b) * td->min_bs < td->io_size) {
- if (td->file_map[i] != -1UL) {
- *b += ffz(td->file_map[i]);
+ while ((*b) * td->min_bs < f->file_size) {
+ if (f->file_map[i] != -1UL) {
+ *b += ffz(f->file_map[i]);
return 0;
}
/*
* Mark a given offset as used in the map.
*/
-static void mark_random_map(struct thread_data *td, struct io_u *io_u)
+static void mark_random_map(struct thread_data *td, struct fio_file *f,
+ struct io_u *io_u)
{
unsigned long long block = io_u->offset / (unsigned long long) td->min_bs;
unsigned int blocks = 0;
while (blocks < (io_u->buflen / td->min_bs)) {
unsigned int idx, bit;
- if (!random_map_free(td, block))
+ if (!random_map_free(td, f, block))
break;
- idx = RAND_MAP_IDX(td, block);
- bit = RAND_MAP_BIT(td, block);
+ idx = RAND_MAP_IDX(td, f, block);
+ bit = RAND_MAP_BIT(td, f, block);
- assert(idx < td->num_maps);
+ assert(idx < f->num_maps);
- td->file_map[idx] |= (1UL << bit);
+ f->file_map[idx] |= (1UL << bit);
block++;
blocks++;
}
* until we find a free one. For sequential io, just return the end of
* the last io issued.
*/
-static int get_next_offset(struct thread_data *td, unsigned long long *offset)
+static int get_next_offset(struct thread_data *td, struct fio_file *f,
+ unsigned long long *offset)
{
unsigned long long b, rb;
long r;
do {
r = os_random_long(&td->random_state);
b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0));
- rb = b + (td->file_offset / td->min_bs);
+ rb = b + (f->file_offset / td->min_bs);
loops--;
- } while (!random_map_free(td, rb) && loops);
+ } while (!random_map_free(td, f, rb) && loops);
if (!loops) {
- if (get_next_free_block(td, &b))
+ if (get_next_free_block(td, f, &b))
return 1;
}
} else
- b = td->last_pos / td->min_bs;
+ b = f->last_pos / td->min_bs;
- *offset = (b * td->min_bs) + td->file_offset;
- if (*offset > td->real_file_size)
+ *offset = (b * td->min_bs) + f->file_offset;
+ if (*offset > f->file_size)
return 1;
return 0;
void put_io_u(struct thread_data *td, struct io_u *io_u)
{
+ io_u->file = NULL;
list_del(&io_u->list);
list_add(&io_u->list, &td->io_u_freelist);
td->cur_depth--;
}
-static int fill_io_u(struct thread_data *td, struct io_u *io_u)
+static int fill_io_u(struct thread_data *td, struct fio_file *f,
+ struct io_u *io_u)
{
/*
* If using an iolog, grab next piece if any available.
/*
* No log, let the seq/rand engine retrieve the next position.
*/
- if (!get_next_offset(td, &io_u->offset)) {
+ if (!get_next_offset(td, f, &io_u->offset)) {
io_u->buflen = get_next_buflen(td);
if (io_u->buflen) {
if (td->write_iolog)
write_iolog_put(td, io_u);
+ io_u->file = f;
return 0;
}
}
* Return an io_u to be processed. Gets a buflen and offset, sets direction,
* etc. The returned io_u is fully ready to be prepped and submitted.
*/
-static struct io_u *get_io_u(struct thread_data *td)
+static struct io_u *get_io_u(struct thread_data *td, struct fio_file *f)
{
struct io_u *io_u;
if (td->zone_bytes >= td->zone_size) {
td->zone_bytes = 0;
- td->last_pos += td->zone_skip;
+ f->last_pos += td->zone_skip;
}
- if (fill_io_u(td, io_u)) {
+ if (fill_io_u(td, f, io_u)) {
put_io_u(td, io_u);
return NULL;
}
- if (io_u->buflen + io_u->offset > td->real_file_size)
- io_u->buflen = td->real_file_size - io_u->offset;
+ if (io_u->buflen + io_u->offset > f->file_size)
+ io_u->buflen = f->file_size - io_u->offset;
if (!io_u->buflen) {
put_io_u(td, io_u);
}
if (!td->read_iolog && !td->sequential)
- mark_random_map(td, io_u);
+ mark_random_map(td, f, io_u);
- td->last_pos += io_u->buflen;
+ f->last_pos += io_u->buflen;
if (td->verify != VERIFY_NONE)
populate_io_u(td, io_u);
return 1;
}
-static int sync_td(struct thread_data *td)
+static struct fio_file *get_next_file(struct thread_data *td)
+{
+ struct fio_file *f = &td->files[td->next_file];
+
+ td->next_file++;
+ if (td->next_file >= td->nr_files)
+ td->next_file = 0;
+
+ return f;
+}
+
+static int td_io_sync(struct thread_data *td, struct fio_file *f)
{
if (td->io_ops->sync)
- return td->io_ops->sync(td);
+ return td->io_ops->sync(td, f);
return 0;
}
struct timeval t;
struct io_u *io_u, *v_io_u = NULL;
struct io_completion_data icd;
+ struct fio_file *f;
int ret;
td_set_runstate(td, TD_VERIFYING);
break;
}
+ f = get_next_file(td);
+ if (!f)
+ break;
+
+ io_u->file = f;
+
if (td_io_prep(td, io_u)) {
put_io_u(td, io_u);
break;
struct io_completion_data icd;
struct timeval s, e;
unsigned long usec;
+ struct fio_file *f;
+ int i;
td_set_runstate(td, TD_RUNNING);
if (td->terminate)
break;
- io_u = get_io_u(td);
+ f = get_next_file(td);
+ if (!f)
+ break;
+
+ io_u = get_io_u(td, f);
if (!io_u)
break;
if (should_fsync(td) && td->fsync_blocks &&
(td->io_blocks[DDIR_WRITE] % td->fsync_blocks) == 0)
- sync_td(td);
+ td_io_sync(td, f);
}
if (td->cur_depth)
if (should_fsync(td) && td->end_fsync) {
td_set_runstate(td, TD_FSYNCING);
- sync_td(td);
+ for_each_file(td, f, i)
+ td_io_sync(td, f);
}
}
return 0;
}
-static int create_file(struct thread_data *td, unsigned long long size)
-{
- unsigned long long left;
- unsigned int bs;
- char *b;
- int r;
-
- /*
- * unless specifically asked for overwrite, let normal io extend it
- */
- if (!td->overwrite) {
- td->real_file_size = size;
- return 0;
- }
-
- if (!size) {
- log_err("Need size for create\n");
- td_verror(td, EINVAL);
- return 1;
- }
-
- temp_stall_ts = 1;
- fprintf(f_out, "%s: Laying out IO file (%LuMiB)\n",td->name,size >> 20);
-
- td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
- if (td->fd < 0) {
- td_verror(td, errno);
- goto done_noclose;
- }
-
- if (ftruncate(td->fd, td->file_size) == -1) {
- td_verror(td, errno);
- goto done;
- }
-
- td->io_size = td->file_size;
- b = malloc(td->max_bs);
- memset(b, 0, td->max_bs);
-
- left = size;
- while (left && !td->terminate) {
- bs = td->max_bs;
- if (bs > left)
- bs = left;
-
- r = write(td->fd, b, bs);
-
- if (r == (int) bs) {
- left -= bs;
- continue;
- } else {
- if (r < 0)
- td_verror(td, errno);
- else
- td_verror(td, EIO);
-
- break;
- }
- }
-
- if (td->terminate)
- unlink(td->file_name);
- else if (td->create_fsync)
- fsync(td->fd);
-
- free(b);
-done:
- close(td->fd);
- td->fd = -1;
-done_noclose:
- temp_stall_ts = 0;
- return 0;
-}
-
-static int file_size(struct thread_data *td)
-{
- struct stat st;
-
- if (td->overwrite) {
- if (fstat(td->fd, &st) == -1) {
- td_verror(td, errno);
- return 1;
- }
-
- td->real_file_size = st.st_size;
-
- if (!td->file_size || td->file_size > td->real_file_size)
- td->file_size = td->real_file_size;
- }
-
- td->file_size -= td->file_offset;
- return 0;
-}
-
-static int bdev_size(struct thread_data *td)
-{
- unsigned long long bytes;
- int r;
-
- r = blockdev_size(td->fd, &bytes);
- if (r) {
- td_verror(td, r);
- return 1;
- }
-
- td->real_file_size = bytes;
-
- /*
- * no extend possibilities, so limit size to device size if too large
- */
- if (!td->file_size || td->file_size > td->real_file_size)
- td->file_size = td->real_file_size;
-
- td->file_size -= td->file_offset;
- return 0;
-}
-
-static int get_file_size(struct thread_data *td)
-{
- int ret = 0;
-
- if (td->filetype == FIO_TYPE_FILE)
- ret = file_size(td);
- else if (td->filetype == FIO_TYPE_BD)
- ret = bdev_size(td);
- else
- td->real_file_size = -1;
-
- if (ret)
- return ret;
-
- if (td->file_offset > td->real_file_size) {
- log_err("%s: offset extends end (%Lu > %Lu)\n", td->name, td->file_offset, td->real_file_size);
- return 1;
- }
-
- td->io_size = td->file_size;
- if (td->io_size == 0) {
- log_err("%s: no io blocks\n", td->name);
- td_verror(td, EINVAL);
- return 1;
- }
-
- if (!td->zone_size)
- td->zone_size = td->io_size;
-
- td->total_io_size = td->io_size * td->loops;
- return 0;
-}
-
-static int setup_file_mmap(struct thread_data *td)
-{
- int flags;
-
- if (td_rw(td))
- flags = PROT_READ | PROT_WRITE;
- else if (td_write(td)) {
- flags = PROT_WRITE;
-
- if (td->verify != VERIFY_NONE)
- flags |= PROT_READ;
- } else
- flags = PROT_READ;
-
- td->mmap = mmap(NULL, td->file_size, flags, MAP_SHARED, td->fd, td->file_offset);
- if (td->mmap == MAP_FAILED) {
- td->mmap = NULL;
- td_verror(td, errno);
- return 1;
- }
-
- if (td->invalidate_cache) {
- if (madvise(td->mmap, td->file_size, MADV_DONTNEED) < 0) {
- td_verror(td, errno);
- return 1;
- }
- }
-
- if (td->sequential) {
- if (madvise(td->mmap, td->file_size, MADV_SEQUENTIAL) < 0) {
- td_verror(td, errno);
- return 1;
- }
- } else {
- if (madvise(td->mmap, td->file_size, MADV_RANDOM) < 0) {
- td_verror(td, errno);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int setup_file_plain(struct thread_data *td)
-{
- if (td->invalidate_cache) {
- if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
- td_verror(td, errno);
- return 1;
- }
- }
-
- if (td->sequential) {
- if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_SEQUENTIAL) < 0) {
- td_verror(td, errno);
- return 1;
- }
- } else {
- if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_RANDOM) < 0) {
- td_verror(td, errno);
- return 1;
- }
- }
-
- return 0;
-}
-
-static int setup_file(struct thread_data *td)
-{
- struct stat st;
- int flags = 0;
-
- if (td->io_ops->setup)
- return td->io_ops->setup(td);
-
- if (stat(td->file_name, &st) == -1) {
- if (errno != ENOENT) {
- td_verror(td, errno);
- return 1;
- }
- if (!td->create_file) {
- td_verror(td, ENOENT);
- return 1;
- }
- if (create_file(td, td->file_size))
- return 1;
- } else if (td->filetype == FIO_TYPE_FILE &&
- st.st_size < (off_t) td->file_size) {
- if (create_file(td, td->file_size))
- return 1;
- }
-
- if (td->odirect)
- flags |= OS_O_DIRECT;
-
- if (td_write(td) || td_rw(td)) {
- if (td->filetype == FIO_TYPE_FILE) {
- if (!td->overwrite)
- flags |= O_TRUNC;
-
- flags |= O_CREAT;
- }
- if (td->sync_io)
- flags |= O_SYNC;
-
- flags |= O_RDWR;
-
- td->fd = open(td->file_name, flags, 0600);
- } else {
- if (td->filetype == FIO_TYPE_CHAR)
- flags |= O_RDWR;
- else
- flags |= O_RDONLY;
-
- td->fd = open(td->file_name, flags);
- }
-
- if (td->fd == -1) {
- td_verror(td, errno);
- return 1;
- }
-
- if (get_file_size(td))
- return 1;
-
- if (td->io_ops->flags & FIO_MMAPIO)
- return setup_file_mmap(td);
- else
- return setup_file_plain(td);
-}
-
static int switch_ioscheduler(struct thread_data *td)
{
char tmp[256], tmp2[128];
static void clear_io_state(struct thread_data *td)
{
- if (td->io_ops->flags & FIO_SYNCIO)
- lseek(td->fd, SEEK_SET, 0);
+ struct fio_file *f;
+ int i;
- td->last_pos = 0;
td->stat_io_bytes[0] = td->stat_io_bytes[1] = 0;
td->this_io_bytes[0] = td->this_io_bytes[1] = 0;
td->zone_bytes = 0;
- if (td->file_map)
- memset(td->file_map, 0, td->num_maps * sizeof(long));
+ for_each_file(td, f, i) {
+ f->last_pos = 0;
+ if (td->io_ops->flags & FIO_SYNCIO)
+ lseek(f->fd, SEEK_SET, 0);
+
+ if (f->file_map)
+ memset(f->file_map, 0, f->num_maps * sizeof(long));
+ }
}
/*
fio_sem_up(&startup_sem);
fio_sem_down(&td->mutex);
- if (!td->create_serialize && setup_file(td))
+ if (!td->create_serialize && setup_files(td))
goto err;
gettimeofday(&td->epoch, NULL);
terminate_threads(td->groupid);
err:
- if (td->fd != -1) {
- close(td->fd);
- td->fd = -1;
- }
- if (td->mmap)
- munmap(td->mmap, td->file_size);
+ close_files(td);
close_ioengine(td);
cleanup_io_u(td);
td_set_runstate(td, TD_EXITED);
* we don't want X number of threads getting their
* client data interspersed on disk
*/
- if (setup_file(td)) {
+ if (setup_files(td)) {
td_set_runstate(td, TD_REAPED);
todo--;
}
struct io_piece {
struct list_head list;
+ struct fio_file *file;
unsigned long long offset;
unsigned int len;
int ddir;
unsigned char seen;
unsigned char ddir;
+ struct fio_file *file;
+
struct list_head list;
};
FIO_MMAPIO = 1 << 2,
};
+struct fio_file {
+ /*
+ * A file may not be a file descriptor, let the io engine decide
+ */
+ union {
+ unsigned long file_data;
+ int fd;
+ };
+ char *file_name;
+ void *mmap;
+ unsigned long long file_size;
+ unsigned long long real_file_size;
+ unsigned long long file_offset;
+ unsigned long long last_pos;
+
+ unsigned long *file_map;
+ unsigned int num_maps;
+};
+
/*
* This describes a single thread/process executing a fio job.
*/
struct thread_data {
char name[32];
- char *file_name;
char *directory;
char verror[80];
pthread_t thread;
int thread_number;
int groupid;
enum fio_filetype filetype;
+ struct fio_file *files;
+ unsigned int nr_files;
+ unsigned int next_file;
int error;
- int fd;
- void *mmap;
pid_t pid;
char *orig_buffer;
size_t orig_buffer_size;
unsigned int overwrite;
unsigned int bw_avg_time;
unsigned int loops;
- unsigned long long file_size;
- unsigned long long real_file_size;
- unsigned long long file_offset;
unsigned long long zone_size;
unsigned long long zone_skip;
enum fio_memtype mem_type;
unsigned long runtime[2]; /* msec */
unsigned long long io_size;
+ unsigned long long total_file_size;
+ unsigned long long start_offset;
unsigned long long total_io_size;
unsigned long long io_blocks[2];
unsigned long long io_bytes[2];
unsigned long long zone_bytes;
unsigned long long this_io_bytes[2];
- unsigned long long last_pos;
volatile int mutex;
/*
* State for random io, a bitmap of blocks done vs not done
*/
os_random_state_t random_state;
- unsigned long *file_map;
- unsigned int num_maps;
/*
* CPU "io" cycle burner
extern FILE *f_out;
extern FILE *f_err;
extern char *fio_inst_prefix;
+extern int temp_stall_ts;
extern struct thread_data *threads;
#define td_rw(td) ((td)->iomix != 0)
#define BLOCKS_PER_MAP (8 * sizeof(long))
-#define TO_MAP_BLOCK(td, b) ((b) - ((td)->file_offset / (td)->min_bs))
-#define RAND_MAP_IDX(td, b) (TO_MAP_BLOCK(td, b) / BLOCKS_PER_MAP)
-#define RAND_MAP_BIT(td, b) (TO_MAP_BLOCK(td, b) & (BLOCKS_PER_MAP - 1))
+#define TO_MAP_BLOCK(td, f, b) ((b) - ((f)->file_offset / (td)->min_bs))
+#define RAND_MAP_IDX(td, f, b) (TO_MAP_BLOCK(td, f, b) / BLOCKS_PER_MAP)
+#define RAND_MAP_BIT(td, f, b) (TO_MAP_BLOCK(td, f, b) & (BLOCKS_PER_MAP - 1))
#define MAX_JOBS (1024)
extern int parse_options(int, char **);
extern int init_random_state(struct thread_data *);
+/*
+ * File setup/shutdown
+ */
+extern void close_files(struct thread_data *);
+extern int setup_files(struct thread_data *);
+
/*
* This is a pretty crappy semaphore implementation, but with the use that fio
* has (just signalling start/go conditions), it doesn't have to be better.
struct io_u *(*event)(struct thread_data *, int);
int (*cancel)(struct thread_data *, struct io_u *);
void (*cleanup)(struct thread_data *);
- int (*sync)(struct thread_data *);
+ int (*sync)(struct thread_data *, struct fio_file *);
void *data;
void *dlhandle;
};
-#define FIO_IOOPS_VERSION 1
+#define FIO_IOOPS_VERSION 2
extern struct ioengine_ops *load_ioengine(struct thread_data *, char *);
extern void close_ioengine(struct thread_data *);
*/
#define fio_unused __attribute((__unused__))
+#define for_each_file(td, f, i) \
+ for ((i) = 0, (f) = &(td)->files[(i)]; (i) < (td)->nr_files; (i)++, (f) = &(td)->files[(i)])
+
#endif
#define DEF_RWMIX_CYCLE (500)
#define DEF_RWMIX_READ (50)
#define DEF_NICE (0)
+#define DEF_NR_FILES (1)
static int def_timeout = DEF_TIMEOUT;
*td = *parent;
td->name[0] = '\0';
- td->fd = -1;
td->thread_number = thread_number;
return td;
}
char *ddir_str[] = { "read", "write", "randread", "randwrite",
"rw", NULL, "randrw" };
struct stat sb;
- int numjobs, ddir;
+ int numjobs, ddir, i;
+ struct fio_file *f;
#ifndef FIO_HAVE_LIBAIO
if (td->io_engine == FIO_LIBAIO) {
}
/*
- * only really works for sequential io for now
+ * only really works for sequential io for now, and with 1 file
*/
- if (td->zone_size && !td->sequential)
+ if (td->zone_size && !td->sequential && td->nr_files == 1)
td->zone_size = 0;
/*
if (td->filetype == FIO_TYPE_FILE) {
char tmp[PATH_MAX];
+ int len = 0;
+ int i;
if (td->directory && td->directory[0] != '\0')
- sprintf(tmp, "%s/%s.%d", td->directory, jobname, td->thread_number);
- else
- sprintf(tmp, "%s.%d", jobname, td->thread_number);
- td->file_name = strdup(tmp);
- } else
- td->file_name = strdup(jobname);
+ sprintf(tmp, "%s/", td->directory);
+ td->files = malloc(sizeof(struct fio_file) * td->nr_files);
+
+ for_each_file(td, f, i) {
+ memset(f, 0, sizeof(*f));
+ f->fd = -1;
+
+ sprintf(tmp + len, "%s.%d.%d", jobname, td->thread_number, i);
+ f->file_name = strdup(tmp);
+ }
+ } else {
+ td->nr_files = 1;
+ td->files = malloc(sizeof(struct fio_file));
+ f = &td->files[0];
+
+ memset(f, 0, sizeof(*f));
+ f->fd = -1;
+ f->file_name = strdup(jobname);
+ }
+
+ for_each_file(td, f, i) {
+ f->file_size = td->total_file_size / td->nr_files;
+ f->file_offset = td->start_offset;
+ }
+
fio_sem_init(&td->mutex, 0);
td->clat_stat[0].min_val = td->clat_stat[1].min_val = ULONG_MAX;
int init_random_state(struct thread_data *td)
{
unsigned long seeds[4];
- int fd, num_maps, blocks;
+ int fd, num_maps, blocks, i;
fd = open("/dev/urandom", O_RDONLY);
if (fd == -1) {
if (td->rand_repeatable)
seeds[3] = DEF_RANDSEED;
- blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
- num_maps = blocks / BLOCKS_PER_MAP;
- td->file_map = malloc(num_maps * sizeof(long));
- td->num_maps = num_maps;
- memset(td->file_map, 0, num_maps * sizeof(long));
+ for (i = 0; i < td->nr_files; i++) {
+ struct fio_file *f = &td->files[i];
+
+ blocks = (f->file_size + td->min_bs - 1) / td->min_bs;
+ num_maps = blocks / BLOCKS_PER_MAP;
+ f->file_map = malloc(num_maps * sizeof(long));
+ f->num_maps = num_maps;
+ memset(f->file_map, 0, num_maps * sizeof(long));
+ }
os_random_seed(seeds[3], &td->random_state);
return 0;
fgetpos(f, &off);
continue;
}
+ if (!check_int(p, "nrfiles", &td->nr_files)) {
+ fgetpos(f, &off);
+ continue;
+ }
if (!check_range_bytes(p, "bsrange", &ul1, &ul2)) {
if (ul1 > ul2) {
td->max_bs = ul1;
fgetpos(f, &off);
continue;
}
- if (!check_str_bytes(p, "size", &td->file_size)) {
+ if (!check_str_bytes(p, "size", &td->total_file_size)) {
fgetpos(f, &off);
continue;
}
- if (!check_str_bytes(p, "offset", &td->file_offset)) {
+ if (!check_str_bytes(p, "offset", &td->start_offset)) {
fgetpos(f, &off);
continue;
}
def_thread.rwmixread = DEF_RWMIX_READ;
def_thread.nice = DEF_NICE;
def_thread.rand_repeatable = DEF_RAND_REPEAT;
+ def_thread.nr_files = DEF_NR_FILES;
#ifdef FIO_HAVE_DISK_UTIL
def_thread.do_disk_util = 1;
#endif
io_u->offset = ipo->offset;
io_u->buflen = ipo->len;
io_u->ddir = ipo->ddir;
+ io_u->file = ipo->file;
free(ipo);
return 0;
}
struct list_head *entry;
INIT_LIST_HEAD(&ipo->list);
+ ipo->file = io_u->file;
ipo->offset = io_u->offset;
ipo->len = io_u->buflen;
void init_disk_util(struct thread_data *td)
{
+ struct fio_file *f;
struct stat st;
char foo[256], tmp[256];
dev_t dev;
if (!td->do_disk_util)
return;
- if (!stat(td->file_name, &st)) {
+ /*
+ * Just use the same file, they are on the same device.
+ */
+ f = &td->files[0];
+ if (!stat(f->file_name, &st)) {
if (S_ISBLK(st.st_mode))
dev = st.st_rdev;
else
/*
* must be a file, open "." in that path
*/
- strcpy(foo, td->file_name);
+ strcpy(foo, f->file_name);
p = dirname(foo);
if (stat(p, &st)) {
perror("disk util stat");