Add a real semaphore implemtation
authorJens Axboe <jens.axboe@oracle.com>
Thu, 8 Mar 2007 19:25:46 +0000 (20:25 +0100)
committerJens Axboe <jens.axboe@oracle.com>
Thu, 8 Mar 2007 19:25:46 +0000 (20:25 +0100)
I've seen races where job N+1 got started before N, this breaks
for dependent jobs. So give up and implement a real semaphore
in mmap'ed shared storage.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Makefile
fio.c
fio.h
init.c
mutex.c [new file with mode: 0644]
mutex.h [new file with mode: 0644]

index 6af29a3..63289e6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ CFLAGS  = -Wwrite-strings -Wall -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_
 PROGS  = fio
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
-       filesetup.o eta.o verify.o memory.o io_u.o parse.o
+       filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o
 
 OBJS += engines/cpu.o
 OBJS += engines/libaio.o
diff --git a/fio.c b/fio.c
index 72cd02b..3cf2a9b 100644 (file)
--- a/fio.c
+++ b/fio.c
@@ -46,7 +46,7 @@ int thread_number = 0;
 int shm_id = 0;
 int temp_stall_ts;
 
-static volatile int startup_sem;
+static struct fio_sem *startup_sem;
 static volatile int fio_abort;
 static int exit_value;
 
@@ -731,8 +731,8 @@ static void *thread_main(void *data)
                goto err;
 
        td_set_runstate(td, TD_INITIALIZED);
-       fio_sem_up(&startup_sem);
-       fio_sem_down(&td->mutex);
+       fio_sem_up(startup_sem);
+       fio_sem_down(td->mutex);
 
        if (!td->create_serialize && setup_files(td))
                goto err;
@@ -930,6 +930,8 @@ reaped:
                                perror("pthread_join");
                }
 
+               fio_sem_remove(td->mutex);
+
                (*nr_running)--;
                (*m_rate) -= td->ratemin;
                (*t_rate) -= td->rate;
@@ -1030,7 +1032,6 @@ static void run_threads(void)
                         */
                        td_set_runstate(td, TD_CREATED);
                        map[this_jobs++] = td;
-                       fio_sem_init(&startup_sem, 1);
                        nr_started++;
 
                        if (td->use_thread) {
@@ -1039,14 +1040,13 @@ static void run_threads(void)
                                        nr_started--;
                                }
                        } else {
-                               if (fork())
-                                       fio_sem_down(&startup_sem);
-                               else {
+                               if (!fork()) {
                                        int ret = fork_main(shm_id, i);
 
                                        exit(ret);
                                }
                        }
+                       fio_sem_down(startup_sem);
                }
 
                /*
@@ -1101,7 +1101,7 @@ static void run_threads(void)
                        m_rate += td->ratemin;
                        t_rate += td->rate;
                        todo--;
-                       fio_sem_up(&td->mutex);
+                       fio_sem_up(td->mutex);
                }
 
                reap_threads(&nr_running, &t_rate, &m_rate);
@@ -1151,6 +1151,8 @@ int main(int argc, char *argv[])
                setup_log(&agg_io_log[DDIR_WRITE]);
        }
 
+       startup_sem = fio_sem_init(0);
+
        set_genesis_time();
 
        disk_util_timer_arm();
@@ -1165,5 +1167,6 @@ int main(int argc, char *argv[])
                }
        }
 
+       fio_sem_remove(startup_sem);
        return exit_value;
 }
diff --git a/fio.h b/fio.h
index 7f314de..12cf3c9 100644 (file)
--- a/fio.h
+++ b/fio.h
@@ -17,6 +17,7 @@
 #include "crc32.h"
 #include "arch.h"
 #include "os.h"
+#include "mutex.h"
 
 #ifdef FIO_HAVE_SYSLET
 #include "syslet.h"
@@ -431,7 +432,7 @@ struct thread_data {
        unsigned long long io_bytes[2];
        unsigned long long this_io_bytes[2];
        unsigned long long zone_bytes;
-       volatile int mutex;
+       struct fio_sem *mutex;
 
        /*
         * State for random io, a bitmap of blocks done vs not done
@@ -703,30 +704,6 @@ extern int __must_check td_io_commit(struct thread_data *);
 extern int __must_check td_io_open_file(struct thread_data *, struct fio_file *);
 extern void td_io_close_file(struct thread_data *, struct fio_file *);
 
-/*
- * This is a pretty crappy semaphore implementation, but with the use that fio
- * has (just signalling start/go conditions), it doesn't have to be better.
- * Naturally this would not work for any type of contended semaphore or
- * for real locking.
- */
-static inline void fio_sem_init(volatile int *sem, int val)
-{
-       *sem = val;
-}
-
-static inline void fio_sem_down(volatile int *sem)
-{
-       while (*sem == 0)
-               usleep(10000);
-
-       (*sem)--;
-}
-
-static inline void fio_sem_up(volatile int *sem)
-{
-       (*sem)++;
-}
-
 /*
  * If logging output to a file, stderr should go to both stderr and f_err
  */
diff --git a/init.c b/init.c
index a6d8bae..c75bed2 100644 (file)
--- a/init.c
+++ b/init.c
@@ -857,7 +857,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
                f->file_offset = td->start_offset;
        }
                
-       fio_sem_init(&td->mutex, 0);
+       td->mutex = fio_sem_init(0);
 
        td->ts.clat_stat[0].min_val = td->ts.clat_stat[1].min_val = ULONG_MAX;
        td->ts.slat_stat[0].min_val = td->ts.slat_stat[1].min_val = ULONG_MAX;
diff --git a/mutex.c b/mutex.c
new file mode 100644 (file)
index 0000000..bb417c2
--- /dev/null
+++ b/mutex.c
@@ -0,0 +1,85 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "mutex.h"
+
+void fio_sem_remove(struct fio_sem *sem)
+{
+       unlink(sem->sem_name);
+       munmap(sem, sizeof(*sem));
+}
+
+struct fio_sem *fio_sem_init(int value)
+{
+       pthread_mutexattr_t attr;
+       struct fio_sem *sem;
+       char sem_name[32];
+       int fd;
+
+       sprintf(sem_name, "/tmp/.fio_lock.XXXXXX");
+       fd = mkstemp(sem_name);
+       if (fd < 0) {
+               perror("open sem");
+               return NULL;
+       }
+
+       if (ftruncate(fd, sizeof(struct fio_sem)) < 0) {
+               perror("ftruncate sem");
+               return NULL;
+       }
+
+       sem = mmap(NULL, sizeof(struct fio_sem), PROT_READ | PROT_WRITE,
+                       MAP_SHARED, fd, 0);
+       if (sem == MAP_FAILED) {
+               perror("mmap sem");
+               close(fd);
+               unlink(sem_name);
+               return NULL;
+       }
+
+       close(fd);
+       sem->value = value;
+       strcpy(sem->sem_name, sem_name);
+
+       if (pthread_mutexattr_init(&attr)) {
+               perror("pthread_mutexattr_init");
+               goto err;
+       }
+       if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
+               perror("pthread_mutexattr_setpshared");
+               goto err;
+       }
+       if (pthread_mutex_init(&sem->lock, &attr)) {
+               perror("pthread_mutex_init");
+               goto err;
+       }
+
+       return sem;
+err:
+       munmap(sem, sizeof(*sem));
+       unlink(sem_name);
+       return NULL;
+}
+
+void fio_sem_down(struct fio_sem *sem)
+{
+       pthread_mutex_lock(&sem->lock);
+       while (sem->value == 0)
+               pthread_cond_wait(&sem->cond, &sem->lock);
+       sem->value--;
+       pthread_mutex_unlock(&sem->lock);
+}
+
+void fio_sem_up(struct fio_sem *sem)
+{
+       pthread_mutex_lock(&sem->lock);
+       if (!sem->value)
+               pthread_cond_signal(&sem->cond);
+       sem->value++;
+       pthread_mutex_unlock(&sem->lock);
+}
diff --git a/mutex.h b/mutex.h
new file mode 100644 (file)
index 0000000..483e5f4
--- /dev/null
+++ b/mutex.h
@@ -0,0 +1,19 @@
+#ifndef FIO_MUTEX_H
+#define FIO_MUTEX_H
+
+#include <pthread.h>
+
+struct fio_sem {
+       pthread_mutex_t lock;
+       pthread_cond_t cond;
+       unsigned int value;
+
+       char sem_name[32];
+};
+
+extern struct fio_sem *fio_sem_init(int);
+extern void fio_sem_remove(struct fio_sem *);
+extern inline void fio_sem_down(struct fio_sem *);
+extern inline void fio_sem_up(struct fio_sem *sem);
+
+#endif