Revamp file locking
authorJens Axboe <jens.axboe@oracle.com>
Tue, 4 Mar 2008 09:18:56 +0000 (10:18 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Tue, 4 Mar 2008 09:18:56 +0000 (10:18 +0100)
Get rid of the semaphore implementation, no need to carry both.
Add different locking modes (exclusive and readwrite) to enable
a wider range of testing. Also combine lockfile and lockfile_batch,
the latter is now a postfix option to the former.

So to enable readers-excluding-writers locking mode with a lock batch
count of 4, you would write:

lockfile=readwrite:4

instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
12 files changed:
HOWTO
Makefile
filesetup.c
fio.h
io_u.c
ioengines.c
mutex.c
mutex.h
options.c
sem.c [deleted file]
sem.h [deleted file]
smalloc.c

diff --git a/HOWTO b/HOWTO
index 3e0a31b..3d92293 100644 (file)
--- a/HOWTO
+++ b/HOWTO
@@ -219,13 +219,24 @@ filename=str      Fio normally makes up a filename based on the job name,
 opendir=str    Tell fio to recursively add any file it can find in this
                directory and down the file system tree.
 
-lockfile=bool  If set, fio will lock a file internally before doing IO to it.
-               This makes it safe to share file descriptors across fio
-               jobs that run at the same time.
-
-lockfile_batch=int     Acquiring a semaphore can be quite expensive, so
-               allow a process to complete this number of IOs before releasing
-               the semaphore again. Defaults to 1.
+lockfile=str   Fio defaults to not doing any locking files before it does
+               IO to them. If a file or file descriptor is shared, fio
+               can serialize IO to that file to make the end result
+               consistent. This is usual for emulating real workloads that
+               share files. The lock modes are:
+
+                       none            No locking. The default.
+                       exclusive       Only one thread/process may do IO,
+                                       excluding all others.
+                       readwrite       Read-write locking on the file. Many
+                                       readers may access the file at the
+                                       same time, but writes get exclusive
+                                       access.
+
+               The option may be post-fixed with a lock batch number. If
+               set, then each thread/process may do that amount of IOs to
+               the file before giving up the lock. Since lock acqusition is
+               expensive, batching the lock/unlocks will speed up IO.
 
 readwrite=str
 rw=str         Type of io pattern. Accepted values are:
index 1b77b25..e0ab0b8 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ CFLAGS  = -Wwrite-strings -Wall -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_
 PROGS  = fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o filesetup.o \
-       eta.o verify.o memory.o io_u.o parse.o mutex.o sem.o options.o \
+       eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
        rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o
 
 OBJS += crc/crc7.o
index 4e2a36c..4d2017d 100644 (file)
@@ -226,9 +226,33 @@ int generic_close_file(struct thread_data fio_unused *td, struct fio_file *f)
        return ret;
 }
 
-int generic_open_file(struct thread_data *td, struct fio_file *f)
+static int file_lookup_open(struct fio_file *f, int flags)
 {
        struct fio_file *__f;
+       int from_hash;
+
+       __f = lookup_file_hash(f->file_name);
+       if (__f) {
+               /*
+                * racy, need the __f->lock locked
+                */
+               f->lock = __f->lock;
+               f->lock_owner = __f->lock_owner;
+               f->lock_batch = __f->lock_batch;
+               f->lock_ddir = __f->lock_ddir;
+               f->fd = dup(__f->fd);
+               f->references++;
+               from_hash = 1;
+       } else {
+               f->fd = open(f->file_name, flags, 0600);
+               from_hash = 0;
+       }
+
+       return from_hash;
+}
+
+int generic_open_file(struct thread_data *td, struct fio_file *f)
+{
        int is_std = 0;
        int flags = 0;
        int from_hash = 0;
@@ -267,16 +291,8 @@ open_again:
 
                if (is_std)
                        f->fd = dup(STDOUT_FILENO);
-               else {
-                       __f = lookup_file_hash(f->file_name);
-                       if (__f) {
-                               f->sem = __f->sem;
-                               f->fd = dup(__f->fd);
-                               f->references++;
-                               from_hash = 1;
-                       } else
-                               f->fd = open(f->file_name, flags, 0600);
-               }
+               else
+                       from_hash = file_lookup_open(f, flags);
        } else {
                if (f->filetype == FIO_TYPE_CHAR && !read_only)
                        flags |= O_RDWR;
@@ -285,16 +301,8 @@ open_again:
 
                if (is_std)
                        f->fd = dup(STDIN_FILENO);
-               else {
-                       __f = lookup_file_hash(f->file_name);
-                       if (__f) {
-                               f->sem = __f->sem;
-                               f->fd = dup(__f->fd);
-                               f->references++;
-                               from_hash = 1;
-                       } else
-                               f->fd = open(f->file_name, flags);
-               }
+               else
+                       from_hash = file_lookup_open(f, flags);
        }
 
        if (f->fd == -1) {
@@ -641,8 +649,19 @@ int add_file(struct thread_data *td, const char *fname)
 
        get_file_type(f);
 
-       if (td->o.lockfile)
-               f->sem = fio_sem_init(1);
+       switch (td->o.file_lock_mode) {
+       case FILE_LOCK_NONE:
+               break;
+       case FILE_LOCK_READWRITE:
+               f->lock = fio_mutex_rw_init();
+               break;
+       case FILE_LOCK_EXCLUSIVE:
+               f->lock = fio_mutex_init(1);
+               break;
+       default:
+               log_err("fio: unknown lock mode: %d\n", td->o.file_lock_mode);
+               assert(0);
+       }
 
        td->files_index++;
        if (f->filetype == FIO_TYPE_FILE)
@@ -682,33 +701,64 @@ int put_file(struct thread_data *td, struct fio_file *f)
        return ret;
 }
 
-void lock_file(struct thread_data *td, struct fio_file *f)
+void lock_file(struct thread_data *td, struct fio_file *f, enum fio_ddir ddir)
 {
-       if (f && f->sem) {
-               if (f->sem_owner == td && f->sem_batch--)
-                       return;
+       if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
+               return;
 
-               fio_sem_down(f->sem);
-               f->sem_owner = td;
-               f->sem_batch = td->o.lockfile_batch;
-       }
+       if (f->lock_owner == td && f->lock_batch--)
+               return;
+
+       if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
+               if (ddir == DDIR_READ)
+                       fio_mutex_down_read(f->lock);
+               else
+                       fio_mutex_down_write(f->lock);
+       } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE)
+               fio_mutex_down(f->lock);
+
+       f->lock_owner = td;
+       f->lock_batch = td->o.lockfile_batch;
+       f->lock_ddir = ddir;
 }
 
-void unlock_file(struct fio_file *f)
+void unlock_file(struct thread_data *td, struct fio_file *f)
 {
-       if (f && f->sem) {
-               int sem_val;
+       if (!f->lock || td->o.file_lock_mode == FILE_LOCK_NONE)
+               return;
+       if (f->lock_batch)
+               return;
+
+       if (td->o.file_lock_mode == FILE_LOCK_READWRITE) {
+               const int is_read = f->lock_ddir == DDIR_READ;
+               int val = fio_mutex_getval(f->lock);
 
-               if (f->sem_batch)
-                       return;
+               if ((is_read && val == 1) || (!is_read && val == -1))
+                       f->lock_owner = NULL;
 
-               sem_getvalue(&f->sem->sem, &sem_val);
-               if (!sem_val)
-                       f->sem_owner = NULL;
-               fio_sem_up(f->sem);
+               if (is_read)
+                       fio_mutex_up_read(f->lock);
+               else
+                       fio_mutex_up_write(f->lock);
+       } else if (td->o.file_lock_mode == FILE_LOCK_EXCLUSIVE) {
+               int val = fio_mutex_getval(f->lock);
+
+               if (val == 0)
+                       f->lock_owner = NULL;
+
+               fio_mutex_up(f->lock);
        }
 }
 
+void unlock_file_all(struct thread_data *td, struct fio_file *f)
+{
+       if (f->lock_owner != td)
+               return;
+
+       f->lock_batch = 0;
+       unlock_file(td, f);
+}
+
 static int recurse_dir(struct thread_data *td, const char *dirname)
 {
        struct dirent *dir;
diff --git a/fio.h b/fio.h
index ffd3d7d..b22009b 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -22,7 +22,6 @@
 #include "arch/arch.h"
 #include "os/os.h"
 #include "mutex.h"
-#include "sem.h"
 #include "log.h"
 #include "debug.h"
 
@@ -47,6 +46,12 @@ enum td_ddir {
        TD_DDIR_RANDRW          = TD_DDIR_RW | TD_DDIR_RAND,
 };
 
+enum file_lock_mode {
+       FILE_LOCK_NONE,
+       FILE_LOCK_EXCLUSIVE,
+       FILE_LOCK_READWRITE,
+};
+
 /*
  * Use for maintaining statistics
  */
@@ -313,9 +318,10 @@ struct fio_file {
        /*
         * if io is protected by a semaphore, this is set
         */
-       struct fio_sem *sem;
-       void *sem_owner;
-       unsigned int sem_batch;
+       struct fio_mutex *lock;
+       void *lock_owner;
+       unsigned int lock_batch;
+       enum fio_ddir lock_ddir;
 
        /*
         * block map for random io
@@ -415,7 +421,7 @@ struct thread_options {
 
        unsigned int nr_files;
        unsigned int open_files;
-       unsigned int lockfile;
+       enum file_lock_mode file_lock_mode;
        unsigned int lockfile_batch;
 
        unsigned int odirect;
@@ -820,8 +826,9 @@ extern int __must_check generic_close_file(struct thread_data *, struct fio_file
 extern int add_file(struct thread_data *, const char *);
 extern void get_file(struct fio_file *);
 extern int __must_check put_file(struct thread_data *, struct fio_file *);
-extern void lock_file(struct thread_data *, struct fio_file *);
-extern void unlock_file(struct fio_file *);
+extern void lock_file(struct thread_data *, struct fio_file *, enum fio_ddir);
+extern void unlock_file(struct thread_data *, struct fio_file *);
+extern void unlock_file_all(struct thread_data *, struct fio_file *);
 extern int add_dir_files(struct thread_data *, const char *);
 extern int init_random_map(struct thread_data *);
 extern void dup_files(struct thread_data *, struct thread_data *);
diff --git a/io_u.c b/io_u.c
index 04d7dcb..5a3157a 100644 (file)
--- a/io_u.c
+++ b/io_u.c
@@ -665,7 +665,6 @@ set_file:
                 * td_io_close() does a put_file() as well, so no need to
                 * do that here.
                 */
-               unlock_file(io_u->file);
                io_u->file = NULL;
                td_io_close_file(td, f);
                f->flags |= FIO_FILE_DONE;
index bd2eb4a..87db11c 100644 (file)
@@ -168,14 +168,14 @@ int td_io_prep(struct thread_data *td, struct io_u *io_u)
        dprint_io_u(io_u, "prep");
        fio_ro_check(td, io_u);
 
-       lock_file(td, io_u->file);
+       lock_file(td, io_u->file, io_u->ddir);
 
        if (td->io_ops->prep) {
                int ret = td->io_ops->prep(td, io_u);
 
                dprint(FD_IO, "->prep(%p)=%d\n", io_u, ret);
                if (ret)
-                       unlock_file(io_u->file);
+                       unlock_file(td, io_u->file);
                return ret;
        }
 
@@ -232,7 +232,7 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u)
 
        ret = td->io_ops->queue(td, io_u);
 
-       unlock_file(io_u->file);
+       unlock_file(td, io_u->file);
 
        if (ret != FIO_Q_BUSY)
                io_u_mark_depth(td, io_u);
@@ -359,10 +359,7 @@ int td_io_close_file(struct thread_data *td, struct fio_file *f)
         */
        f->flags |= FIO_FILE_CLOSING;
 
-       if (f->sem_owner == td && f->sem_batch) {
-               f->sem_batch = 0;
-               unlock_file(f);
-       }
+       unlock_file_all(td, f);
 
        return put_file(td, f);
 }
diff --git a/mutex.c b/mutex.c
index bcc37ae..e6fb3f0 100644 (file)
--- a/mutex.c
+++ b/mutex.c
@@ -7,6 +7,7 @@
 #include <sys/mman.h>
 
 #include "mutex.h"
+#include "arch/arch.h"
 
 void fio_mutex_remove(struct fio_mutex *mutex)
 {
@@ -76,8 +77,13 @@ err:
 void fio_mutex_down(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
-       while (mutex->value == 0)
+
+       while (!mutex->value) {
+               mutex->waiters++;
                pthread_cond_wait(&mutex->cond, &mutex->lock);
+               mutex->waiters--;
+       }
+
        mutex->value--;
        pthread_mutex_unlock(&mutex->lock);
 }
@@ -85,7 +91,8 @@ void fio_mutex_down(struct fio_mutex *mutex)
 void fio_mutex_up(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
-       if (!mutex->value)
+       read_barrier();
+       if (!mutex->value && mutex->waiters)
                pthread_cond_signal(&mutex->cond);
        mutex->value++;
        pthread_mutex_unlock(&mutex->lock);
@@ -94,8 +101,13 @@ void fio_mutex_up(struct fio_mutex *mutex)
 void fio_mutex_down_write(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
-       while (mutex->value != 0)
+
+       while (mutex->value != 0) {
+               mutex->waiters++;
                pthread_cond_wait(&mutex->cond, &mutex->lock);
+               mutex->waiters--;
+       }
+
        mutex->value--;
        pthread_mutex_unlock(&mutex->lock);
 }
@@ -103,8 +115,13 @@ void fio_mutex_down_write(struct fio_mutex *mutex)
 void fio_mutex_down_read(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
-       while (mutex->value < 0)
+
+       while (mutex->value < 0) {
+               mutex->waiters++;
                pthread_cond_wait(&mutex->cond, &mutex->lock);
+               mutex->waiters--;
+       }
+
        mutex->value++;
        pthread_mutex_unlock(&mutex->lock);
 }
@@ -113,7 +130,8 @@ void fio_mutex_up_read(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
        mutex->value--;
-       if (mutex->value >= 0)
+       read_barrier();
+       if (mutex->value >= 0 && mutex->waiters)
                pthread_cond_signal(&mutex->cond);
        pthread_mutex_unlock(&mutex->lock);
 }
@@ -122,7 +140,8 @@ void fio_mutex_up_write(struct fio_mutex *mutex)
 {
        pthread_mutex_lock(&mutex->lock);
        mutex->value++;
-       if (mutex->value >= 0)
+       read_barrier();
+       if (mutex->value >= 0 && mutex->waiters)
                pthread_cond_signal(&mutex->cond);
        pthread_mutex_unlock(&mutex->lock);
 }
diff --git a/mutex.h b/mutex.h
index 40cfc1e..7be0ab1 100644 (file)
--- a/mutex.h
+++ b/mutex.h
@@ -7,6 +7,7 @@ struct fio_mutex {
        pthread_mutex_t lock;
        pthread_cond_t cond;
        int value;
+       int waiters;
 
        int mutex_fd;
 };
@@ -25,4 +26,9 @@ static inline struct fio_mutex *fio_mutex_rw_init(void)
        return fio_mutex_init(0);
 }
 
+static inline int fio_mutex_getval(struct fio_mutex *mutex)
+{
+       return mutex->value;
+}
+
 #endif
index 27b4fb6..bb77683 100644 (file)
--- a/options.c
+++ b/options.c
@@ -351,6 +351,18 @@ static int str_verify_pattern_cb(void *data, unsigned int *off)
        return 0;
 }
 
+static int str_lockfile_cb(void *data, const char *str)
+{
+       struct thread_data *td = data;
+       char *nr = get_opt_postfix(str);
+
+       td->o.lockfile_batch = 1;
+       if (nr)
+               td->o.lockfile_batch = atoi(nr);
+
+       return 0;
+}
+
 #define __stringify_1(x)       #x
 #define __stringify(x)         __stringify_1(x)
 
@@ -386,19 +398,27 @@ static struct fio_option options[] = {
        },
        {
                .name   = "lockfile",
-               .type   = FIO_OPT_BOOL,
-               .off1   = td_var_offset(lockfile),
+               .type   = FIO_OPT_STR,
+               .cb     = str_lockfile_cb,
+               .off1   = td_var_offset(file_lock_mode),
                .help   = "Lock file when doing IO to it",
                .parent = "filename",
-               .def    = "0",
-       },
-       {
-               .name   = "lockfile_batch",
-               .type   = FIO_OPT_INT,
-               .off1   = td_var_offset(lockfile_batch),
-               .help   = "Number of IOs to allow per file lock",
-               .parent = "lockfile",
-               .def    = "1",
+               .def    = "none",
+               .posval = {
+                         { .ival = "none",
+                           .oval = FILE_LOCK_NONE,
+                           .help = "No file locking",
+                         },
+                         { .ival = "exclusive",
+                           .oval = FILE_LOCK_EXCLUSIVE,
+                           .help = "Exclusive file lock",
+                         },
+                         {
+                           .ival = "readwrite",
+                           .oval = FILE_LOCK_READWRITE,
+                           .help = "Read vs write lock",
+                         },
+               },
        },
        {
                .name   = "opendir",
diff --git a/sem.c b/sem.c
deleted file mode 100644 (file)
index 8b25ad3..0000000
--- a/sem.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <stdio.h>
-
-#include "sem.h"
-#include "smalloc.h"
-
-void fio_sem_remove(struct fio_sem *sem)
-{
-       sfree(sem);
-}
-
-struct fio_sem *fio_sem_init(int value)
-{
-       struct fio_sem *sem;
-
-       sem = smalloc(sizeof(*sem));
-       if (!sem)
-               return NULL;
-
-       sem->sem_val = value;
-
-       if (!sem_init(&sem->sem, 1, value))
-               return sem;
-
-       perror("sem_init");
-       sfree(sem);
-       return NULL;
-}
-
-void fio_sem_down(struct fio_sem *sem)
-{
-       sem_wait(&sem->sem);
-}
-
-void fio_sem_up(struct fio_sem *sem)
-{
-       sem_post(&sem->sem);
-}
diff --git a/sem.h b/sem.h
deleted file mode 100644 (file)
index a7b4664..0000000
--- a/sem.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef FIO_SEM_H
-#define FIO_SEM_H
-
-#include <semaphore.h>
-
-struct fio_sem {
-       sem_t sem;
-       int sem_val;
-};
-
-extern struct fio_sem *fio_sem_init(int);
-extern void fio_sem_remove(struct fio_sem *);
-extern void fio_sem_down(struct fio_sem *);
-extern void fio_sem_up(struct fio_sem *);
-
-#endif
index 9a7c25b..85da781 100644 (file)
--- a/smalloc.c
+++ b/smalloc.c
@@ -268,11 +268,12 @@ out_close:
 
 void sinit(void)
 {
-       int ret = add_pool(&mp[0]);
+       int ret;
 
 #ifdef MP_SAFE
        lock = fio_mutex_rw_init();
 #endif
+       ret = add_pool(&mp[0]);
        assert(!ret);
 }