rate-submit.c

   1 /*
   2  * Rated submission helpers
   3  *
   4  * Copyright (C) 2015 Jens Axboe <axboe@kernel.dk>
   5  *
   6  */
   7 #include <assert.h>
   8 #include <errno.h>
   9 #include <pthread.h>
  10
  11 #include "fio.h"
  12 #include "ioengines.h"
  13 #include "lib/getrusage.h"
  14 #include "rate-submit.h"
  15
  16 static void check_overlap(struct io_u *io_u)
  17 {
  18         int res;
  19
  20         /*
  21          * Allow only one thread to check for overlap at a time to prevent two
  22          * threads from thinking the coast is clear and then submitting IOs
  23          * that overlap with each other.
  24          *
  25          * If an overlap is found, release the lock and re-acquire it before
  26          * checking again to give other threads a chance to make progress.
  27          *
  28          * If no overlap is found, release the lock when the io_u's
  29          * IO_U_F_FLIGHT flag is set so that this io_u can be checked by other
  30          * threads as they assess overlap.
  31          */
  32         res = pthread_mutex_lock(&overlap_check);
  33         if (fio_unlikely(res != 0)) {
  34                 log_err("failed to lock overlap check mutex, err: %i:%s", errno, strerror(errno));
  35                 abort();
  36         }
  37
  38 retry:
  39         for_each_td(td) {
  40                 if (td->runstate <= TD_SETTING_UP ||
  41                     td->runstate >= TD_FINISHING ||
  42                     !td->o.serialize_overlap ||
  43                     td->o.io_submit_mode != IO_MODE_OFFLOAD)
  44                         continue;
  45
  46                 if (!in_flight_overlap(&td->io_u_all, io_u))
  47                         continue;
  48
  49                 res = pthread_mutex_unlock(&overlap_check);
  50                 if (fio_unlikely(res != 0)) {
  51                         log_err("failed to unlock overlap check mutex, err: %i:%s", errno, strerror(errno));
  52                         abort();
  53                 }
  54                 res = pthread_mutex_lock(&overlap_check);
  55                 if (fio_unlikely(res != 0)) {
  56                         log_err("failed to lock overlap check mutex, err: %i:%s", errno, strerror(errno));
  57                         abort();
  58                 }
  59                 goto retry;
  60         } end_for_each();
  61 }
  62
  63 static int io_workqueue_fn(struct submit_worker *sw,
  64                            struct workqueue_work *work)
  65 {
  66         struct io_u *io_u = container_of(work, struct io_u, work);
  67         const enum fio_ddir ddir = io_u->ddir;
  68         struct thread_data *td = sw->priv;
  69         int ret, error;
  70
  71         if (td->o.serialize_overlap)
  72                 check_overlap(io_u);
  73
  74         dprint(FD_RATE, "io_u %p queued by %u\n", io_u, gettid());
  75
  76         io_u_set(td, io_u, IO_U_F_NO_FILE_PUT);
  77
  78         td->cur_depth++;
  79
  80         do {
  81                 ret = td_io_queue(td, io_u);
  82                 if (ret != FIO_Q_BUSY)
  83                         break;
  84                 ret = io_u_queued_complete(td, 1);
  85                 if (ret > 0)
  86                         td->cur_depth -= ret;
  87                 else if (ret < 0)
  88                         break;
  89                 io_u_clear(td, io_u, IO_U_F_FLIGHT);
  90         } while (1);
  91
  92         dprint(FD_RATE, "io_u %p ret %d by %u\n", io_u, ret, gettid());
  93
  94         error = io_queue_event(td, io_u, &ret, ddir, NULL, 0, NULL);
  95
  96         if (ret == FIO_Q_COMPLETED)
  97                 td->cur_depth--;
  98         else if (ret == FIO_Q_QUEUED) {
  99                 unsigned int min_evts;
 100
 101                 if (td->o.iodepth == 1)
 102                         min_evts = 1;
 103                 else
 104                         min_evts = 0;
 105
 106                 ret = io_u_queued_complete(td, min_evts);
 107                 if (ret > 0)
 108                         td->cur_depth -= ret;
 109         }
 110
 111         if (error || td->error) {
 112                 pthread_mutex_lock(&td->io_u_lock);
 113                 pthread_cond_signal(&td->parent->free_cond);
 114                 pthread_mutex_unlock(&td->io_u_lock);
 115         }
 116
 117         return 0;
 118 }
 119
 120 static bool io_workqueue_pre_sleep_flush_fn(struct submit_worker *sw)
 121 {
 122         struct thread_data *td = sw->priv;
 123
 124         if (td->error)
 125                 return false;
 126         if (td->io_u_queued || td->cur_depth || td->io_u_in_flight)
 127                 return true;
 128
 129         return false;
 130 }
 131
 132 static void io_workqueue_pre_sleep_fn(struct submit_worker *sw)
 133 {
 134         struct thread_data *td = sw->priv;
 135         int ret;
 136
 137         ret = io_u_quiesce(td);
 138         if (ret > 0)
 139                 td->cur_depth -= ret;
 140 }
 141
 142 static int io_workqueue_alloc_fn(struct submit_worker *sw)
 143 {
 144         struct thread_data *td;
 145
 146         td = calloc(1, sizeof(*td));
 147         sw->priv = td;
 148         return 0;
 149 }
 150
 151 static void io_workqueue_free_fn(struct submit_worker *sw)
 152 {
 153         free(sw->priv);
 154         sw->priv = NULL;
 155 }
 156
 157 static int io_workqueue_init_worker_fn(struct submit_worker *sw)
 158 {
 159         struct thread_data *parent = sw->wq->td;
 160         struct thread_data *td = sw->priv;
 161
 162         memcpy(&td->o, &parent->o, sizeof(td->o));
 163         memcpy(&td->ts, &parent->ts, sizeof(td->ts));
 164         td->o.uid = td->o.gid = -1U;
 165         dup_files(td, parent);
 166         td->eo = parent->eo;
 167         fio_options_mem_dupe(td);
 168         td->iolog_f = parent->iolog_f;
 169
 170         if (ioengine_load(td))
 171                 goto err;
 172
 173         td->pid = gettid();
 174
 175         INIT_FLIST_HEAD(&td->io_log_list);
 176         INIT_FLIST_HEAD(&td->io_hist_list);
 177         INIT_FLIST_HEAD(&td->verify_list);
 178         INIT_FLIST_HEAD(&td->trim_list);
 179         td->io_hist_tree = RB_ROOT;
 180
 181         td->o.iodepth = 1;
 182         if (td_io_init(td))
 183                 goto err_io_init;
 184
 185         if (td->io_ops->post_init && td->io_ops->post_init(td))
 186                 goto err_io_init;
 187
 188         set_epoch_time(td, td->o.log_alternate_epoch_clock_id, td->o.job_start_clock_id);
 189         fio_getrusage(&td->ru_start);
 190         clear_io_state(td, 1);
 191
 192         td_set_runstate(td, TD_RUNNING);
 193         td->flags |= TD_F_CHILD | TD_F_NEED_LOCK;
 194         td->parent = parent;
 195         return 0;
 196
 197 err_io_init:
 198         close_ioengine(td);
 199 err:
 200         return 1;
 201
 202 }
 203
 204 static void io_workqueue_exit_worker_fn(struct submit_worker *sw,
 205                                         unsigned int *sum_cnt)
 206 {
 207         struct thread_data *td = sw->priv;
 208
 209         (*sum_cnt)++;
 210
 211         /*
 212          * io_workqueue_update_acct_fn() doesn't support per prio stats, and
 213          * even if it did, offload can't be used with all async IO engines.
 214          * If group reporting is set in the parent td, the group result
 215          * generated by __show_run_stats() can still contain multiple prios
 216          * from different offloaded jobs.
 217          */
 218         sw->wq->td->ts.disable_prio_stat = 1;
 219         sum_thread_stats(&sw->wq->td->ts, &td->ts);
 220
 221         fio_options_free(td);
 222         close_and_free_files(td);
 223         if (td->io_ops)
 224                 close_ioengine(td);
 225         td_set_runstate(td, TD_EXITED);
 226 }
 227
 228 #ifdef CONFIG_SFAA
 229 static void sum_val(uint64_t *dst, uint64_t *src)
 230 {
 231         if (*src) {
 232                 __sync_fetch_and_add(dst, *src);
 233                 *src = 0;
 234         }
 235 }
 236 #else
 237 static void sum_val(uint64_t *dst, uint64_t *src)
 238 {
 239         if (*src) {
 240                 *dst += *src;
 241                 *src = 0;
 242         }
 243 }
 244 #endif
 245
 246 static void pthread_double_unlock(pthread_mutex_t *lock1,
 247                                   pthread_mutex_t *lock2)
 248 {
 249 #ifndef CONFIG_SFAA
 250         pthread_mutex_unlock(lock1);
 251         pthread_mutex_unlock(lock2);
 252 #endif
 253 }
 254
 255 static void pthread_double_lock(pthread_mutex_t *lock1, pthread_mutex_t *lock2)
 256 {
 257 #ifndef CONFIG_SFAA
 258         if (lock1 < lock2) {
 259                 pthread_mutex_lock(lock1);
 260                 pthread_mutex_lock(lock2);
 261         } else {
 262                 pthread_mutex_lock(lock2);
 263                 pthread_mutex_lock(lock1);
 264         }
 265 #endif
 266 }
 267
 268 static void sum_ddir(struct thread_data *dst, struct thread_data *src,
 269                      enum fio_ddir ddir)
 270 {
 271         pthread_double_lock(&dst->io_wq.stat_lock, &src->io_wq.stat_lock);
 272
 273         sum_val(&dst->io_bytes[ddir], &src->io_bytes[ddir]);
 274         sum_val(&dst->io_blocks[ddir], &src->io_blocks[ddir]);
 275         sum_val(&dst->this_io_blocks[ddir], &src->this_io_blocks[ddir]);
 276         sum_val(&dst->this_io_bytes[ddir], &src->this_io_bytes[ddir]);
 277         sum_val(&dst->bytes_done[ddir], &src->bytes_done[ddir]);
 278         if (ddir == DDIR_READ)
 279                 sum_val(&dst->bytes_verified, &src->bytes_verified);
 280
 281         pthread_double_unlock(&dst->io_wq.stat_lock, &src->io_wq.stat_lock);
 282 }
 283
 284 static void io_workqueue_update_acct_fn(struct submit_worker *sw)
 285 {
 286         struct thread_data *src = sw->priv;
 287         struct thread_data *dst = sw->wq->td;
 288
 289         if (td_read(src))
 290                 sum_ddir(dst, src, DDIR_READ);
 291         if (td_write(src))
 292                 sum_ddir(dst, src, DDIR_WRITE);
 293         if (td_trim(src))
 294                 sum_ddir(dst, src, DDIR_TRIM);
 295
 296 }
 297
 298 static struct workqueue_ops rated_wq_ops = {
 299         .fn                     = io_workqueue_fn,
 300         .pre_sleep_flush_fn     = io_workqueue_pre_sleep_flush_fn,
 301         .pre_sleep_fn           = io_workqueue_pre_sleep_fn,
 302         .update_acct_fn         = io_workqueue_update_acct_fn,
 303         .alloc_worker_fn        = io_workqueue_alloc_fn,
 304         .free_worker_fn         = io_workqueue_free_fn,
 305         .init_worker_fn         = io_workqueue_init_worker_fn,
 306         .exit_worker_fn         = io_workqueue_exit_worker_fn,
 307 };
 308
 309 int rate_submit_init(struct thread_data *td, struct sk_out *sk_out)
 310 {
 311         if (td->o.io_submit_mode != IO_MODE_OFFLOAD)
 312                 return 0;
 313
 314         return workqueue_init(td, &td->io_wq, &rated_wq_ops, td->o.iodepth, sk_out);
 315 }
 316
 317 void rate_submit_exit(struct thread_data *td)
 318 {
 319         if (td->o.io_submit_mode != IO_MODE_OFFLOAD)
 320                 return;
 321
 322         workqueue_exit(&td->io_wq);
 323 }