From d01612f3ae2515eb035d0c4ce954d8cb167a0a61 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Nov 2013 15:52:58 -0700 Subject: [PATCH] Add support for O_ATOMIC O_ATOMIC makes writes atomic, meaning that they are fully stable on media (in the event of a power cut) when acknowledged by the device and OS. This only truly works on Linux with the pending patches to add O_ATOMIC. Updated by Jens to: - Add man page and HOWTO description of the option - Make O_ATOMIC imply O_DIRECT, so that it actually works if you don't set O_DIRECT manually. - Add the option to the conversion list so it works for client/server. - Error handling so that if atomic=1 is set and the OS does not support it, error out instead of just pretending it works. Signed-off-by: Jens Axboe --- HOWTO | 5 +++++ backend.c | 5 +++-- cconv.c | 2 ++ filesetup.c | 7 +++++++ fio.1 | 5 +++++ init.c | 6 ++++++ memory.c | 4 ++-- options.c | 10 ++++++++++ os/os-linux.h | 6 ++++++ os/os.h | 6 ++++++ server.h | 2 +- thread_options.h | 2 ++ 12 files changed, 55 insertions(+), 5 deletions(-) diff --git a/HOWTO b/HOWTO index eb2ed254..250bc582 100644 --- a/HOWTO +++ b/HOWTO @@ -695,6 +695,11 @@ direct=bool If value is true, use non-buffered io. This is usually O_DIRECT. Note that ZFS on Solaris doesn't support direct io. On Windows the synchronous ioengines don't support direct io. +atomic=bool If value is true, attempt to use atomic direct IO. Atomic + writes are guaranteed to be stable once acknowledged by + the operating system. Only Linux supports O_ATOMIC right + now. + buffered=bool If value is true, use buffered io. This is the opposite of the 'direct' option. Defaults to true. diff --git a/backend.c b/backend.c index 00a23db6..2ec478c1 100644 --- a/backend.c +++ b/backend.c @@ -926,7 +926,8 @@ static int init_io_u(struct thread_data *td) * overflow later. this adjustment may be too much if we get * lucky and the allocator gives us an aligned address. */ - if (td->o.odirect || td->o.mem_align || (td->io_ops->flags & FIO_RAWIO)) + if (td->o.odirect || td->o.mem_align || td->o.oatomic || + (td->io_ops->flags & FIO_RAWIO)) td->orig_buffer_size += page_mask + td->o.mem_align; if (td->o.mem_type == MEM_SHMHUGE || td->o.mem_type == MEM_MMAPHUGE) { @@ -944,7 +945,7 @@ static int init_io_u(struct thread_data *td) if (data_xfer && allocate_io_mem(td)) return 1; - if (td->o.odirect || td->o.mem_align || + if (td->o.odirect || td->o.mem_align || td->o.oatomic || (td->io_ops->flags & FIO_RAWIO)) p = PAGE_ALIGN(td->orig_buffer) + td->o.mem_align; else diff --git a/cconv.c b/cconv.c index 21e3a51b..82383b2d 100644 --- a/cconv.c +++ b/cconv.c @@ -89,6 +89,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->open_files = le32_to_cpu(top->open_files); o->file_lock_mode = le32_to_cpu(top->file_lock_mode); o->odirect = le32_to_cpu(top->odirect); + o->oatomic = le32_to_cpu(top->oatomic); o->invalidate_cache = le32_to_cpu(top->invalidate_cache); o->create_serialize = le32_to_cpu(top->create_serialize); o->create_fsync = le32_to_cpu(top->create_fsync); @@ -252,6 +253,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->open_files = cpu_to_le32(o->open_files); top->file_lock_mode = cpu_to_le32(o->file_lock_mode); top->odirect = cpu_to_le32(o->odirect); + top->oatomic = cpu_to_le32(o->oatomic); top->invalidate_cache = cpu_to_le32(o->invalidate_cache); top->create_serialize = cpu_to_le32(o->create_serialize); top->create_fsync = cpu_to_le32(o->create_fsync); diff --git a/filesetup.c b/filesetup.c index 4265e383..c9b060b7 100644 --- a/filesetup.c +++ b/filesetup.c @@ -519,6 +519,13 @@ int generic_open_file(struct thread_data *td, struct fio_file *f) goto skip_flags; if (td->o.odirect) flags |= OS_O_DIRECT; + if (td->o.oatomic) { + if (!FIO_O_ATOMIC) { + td_verror(td, EINVAL, "OS does not support atomic IO"); + return 1; + } + flags |= OS_O_DIRECT | FIO_O_ATOMIC; + } if (td->o.sync_io) flags |= O_SYNC; if (td->o.create_on_open) diff --git a/fio.1 b/fio.1 index e910e010..15a1ac5d 100644 --- a/fio.1 +++ b/fio.1 @@ -569,6 +569,11 @@ Low watermark indicating when to start filling the queue again. Default: .BI direct \fR=\fPbool If true, use non-buffered I/O (usually O_DIRECT). Default: false. .TP +.BI atomic \fR=\fPbool +If value is true, attempt to use atomic direct IO. Atomic writes are guaranteed +to be stable once acknowledged by the operating system. Only Linux supports +O_ATOMIC right now. +.TP .BI buffered \fR=\fPbool If true, use buffered I/O. This is the opposite of the \fBdirect\fR parameter. Default: true. diff --git a/init.c b/init.c index b45b039e..1841ffc0 100644 --- a/init.c +++ b/init.c @@ -629,6 +629,12 @@ static int fixup_options(struct thread_data *td) ret = 1; } + /* + * O_ATOMIC implies O_DIRECT + */ + if (td->o.oatomic) + td->o.odirect = 1; + return ret; } diff --git a/memory.c b/memory.c index e06cab29..b208320c 100644 --- a/memory.c +++ b/memory.c @@ -209,7 +209,7 @@ int allocate_io_mem(struct thread_data *td) total_mem = td->orig_buffer_size; - if (td->o.odirect || td->o.mem_align || + if (td->o.odirect || td->o.mem_align || td->o.oatomic || (td->io_ops->flags & FIO_MEMALIGN)) { total_mem += page_mask; if (td->o.mem_align && td->o.mem_align > page_size) @@ -240,7 +240,7 @@ void free_io_mem(struct thread_data *td) unsigned int total_mem; total_mem = td->orig_buffer_size; - if (td->o.odirect) + if (td->o.odirect || td->o.oatomic) total_mem += page_mask; if (td->o.mem_type == MEM_MALLOC) diff --git a/options.c b/options.c index f26ff776..4b4c251b 100644 --- a/options.c +++ b/options.c @@ -1892,6 +1892,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_IO_TYPE, }, + { + .name = "atomic", + .lname = "Atomic I/O", + .type = FIO_OPT_BOOL, + .off1 = td_var_offset(oatomic), + .help = "Use Atomic IO with O_DIRECT (implies O_DIRECT)", + .def = "0", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_IO_TYPE, + }, { .name = "buffered", .lname = "Buffered I/O", diff --git a/os/os-linux.h b/os/os-linux.h index 869a25d8..5d1d62db 100644 --- a/os/os-linux.h +++ b/os/os-linux.h @@ -196,6 +196,12 @@ static inline int fio_lookup_raw(dev_t dev, int *majdev, int *mindev) #define FIO_O_NOATIME 0 #endif +#ifdef O_ATOMIC +#define OS_O_ATOMIC O_ATOMIC +#else +#define OS_O_ATOMIC 040000000 +#endif + #ifdef MADV_REMOVE #define FIO_MADV_FREE MADV_REMOVE #endif diff --git a/os/os.h b/os/os.h index 4416ae48..715f2260 100644 --- a/os/os.h +++ b/os/os.h @@ -90,6 +90,12 @@ typedef unsigned long os_cpu_mask_t; #define OS_O_DIRECT O_DIRECT #endif +#ifdef OS_O_ATOMIC +#define FIO_O_ATOMIC OS_O_ATOMIC +#else +#define FIO_O_ATOMIC 0 +#endif + #ifndef FIO_HAVE_HUGETLB #define SHM_HUGETLB 0 #define MAP_HUGETLB 0 diff --git a/server.h b/server.h index 5d9b6cca..405370e3 100644 --- a/server.h +++ b/server.h @@ -38,7 +38,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 26, + FIO_SERVER_VER = 27, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, diff --git a/thread_options.h b/thread_options.h index 484b16a5..44cbf91a 100644 --- a/thread_options.h +++ b/thread_options.h @@ -72,6 +72,7 @@ struct thread_options { enum file_lock_mode file_lock_mode; unsigned int odirect; + unsigned int oatomic; unsigned int invalidate_cache; unsigned int create_serialize; unsigned int create_fsync; @@ -286,6 +287,7 @@ struct thread_options_pack { uint32_t file_lock_mode; uint32_t odirect; + uint32_t oatomic; uint32_t invalidate_cache; uint32_t create_serialize; uint32_t create_fsync; -- 2.25.1