io_u.c

   1 #include <unistd.h>
   2 #include <fcntl.h>
   3 #include <string.h>
   4 #include <signal.h>
   5 #include <time.h>
   6 #include <assert.h>
   7
   8 #include "fio.h"
   9
  10 /*
  11  * Change this define to play with the timeout handling
  12  */
  13 #undef FIO_USE_TIMEOUT
  14
  15 struct io_completion_data {
  16         int nr;                         /* input */
  17
  18         int error;                      /* output */
  19         unsigned long bytes_done[2];    /* output */
  20         struct timeval time;            /* output */
  21 };
  22
  23 /*
  24  * The ->file_map[] contains a map of blocks we have or have not done io
  25  * to yet. Used to make sure we cover the entire range in a fair fashion.
  26  */
  27 static int random_map_free(struct thread_data *td, struct fio_file *f,
  28                            unsigned long long block)
  29 {
  30         unsigned int idx = RAND_MAP_IDX(td, f, block);
  31         unsigned int bit = RAND_MAP_BIT(td, f, block);
  32
  33         return (f->file_map[idx] & (1UL << bit)) == 0;
  34 }
  35
  36 /*
  37  * Mark a given offset as used in the map.
  38  */
  39 static void mark_random_map(struct thread_data *td, struct io_u *io_u)
  40 {
  41         unsigned int min_bs = td->o.rw_min_bs;
  42         struct fio_file *f = io_u->file;
  43         unsigned long long block;
  44         unsigned int blocks;
  45         unsigned int nr_blocks;
  46
  47         block = io_u->offset / (unsigned long long) min_bs;
  48         blocks = 0;
  49         nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
  50
  51         while (blocks < nr_blocks) {
  52                 unsigned int idx, bit;
  53
  54                 /*
  55                  * If we have a mixed random workload, we may
  56                  * encounter blocks we already did IO to.
  57                  */
  58                 if (!td->o.ddir_nr == 1 && !random_map_free(td, f, block))
  59                         break;
  60
  61                 idx = RAND_MAP_IDX(td, f, block);
  62                 bit = RAND_MAP_BIT(td, f, block);
  63
  64                 fio_assert(td, idx < f->num_maps);
  65
  66                 f->file_map[idx] |= (1UL << bit);
  67                 block++;
  68                 blocks++;
  69         }
  70
  71         if ((blocks * min_bs) < io_u->buflen)
  72                 io_u->buflen = blocks * min_bs;
  73 }
  74
  75 /*
  76  * Return the next free block in the map.
  77  */
  78 static int get_next_free_block(struct thread_data *td, struct fio_file *f,
  79                                unsigned long long *b)
  80 {
  81         int i;
  82
  83         i = f->last_free_lookup;
  84         *b = (i * BLOCKS_PER_MAP);
  85         while ((*b) * td->o.rw_min_bs < f->real_file_size) {
  86                 if (f->file_map[i] != -1UL) {
  87                         *b += fio_ffz(f->file_map[i]);
  88                         f->last_free_lookup = i;
  89                         return 0;
  90                 }
  91
  92                 *b += BLOCKS_PER_MAP;
  93                 i++;
  94         }
  95
  96         return 1;
  97 }
  98
  99 static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
 100                                 int ddir, unsigned long long *b)
 101 {
 102         unsigned long long max_blocks = f->io_size / td->o.min_bs[ddir];
 103         unsigned long long r, rb;
 104         int loops = 5;
 105
 106         do {
 107                 r = os_random_long(&td->random_state);
 108                 if (!max_blocks)
 109                         *b = 0;
 110                 else
 111                         *b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0));
 112                 /*
 113                  * if we are not maintaining a random map, we are done.
 114                  */
 115                 if (td->o.norandommap)
 116                         return 0;
 117
 118                 /*
 119                  * calculate map offset and chec if it's free
 120                  */
 121                 rb = *b + (f->file_offset / td->o.min_bs[ddir]);
 122                 if (random_map_free(td, f, rb))
 123                         return 0;
 124
 125         } while (--loops);
 126
 127         /*
 128          * we get here, if we didn't suceed in looking up a block. generate
 129          * a random start offset into the filemap, and find the first free
 130          * block from there.
 131          */
 132         loops = 10;
 133         do {
 134                 f->last_free_lookup = (f->num_maps - 1) * (r / (RAND_MAX+1.0));
 135                 if (!get_next_free_block(td, f, b))
 136                         return 0;
 137
 138                 r = os_random_long(&td->random_state);
 139         } while (--loops);
 140
 141         /*
 142          * that didn't work either, try exhaustive search from the start
 143          */
 144         f->last_free_lookup = 0;
 145         return get_next_free_block(td, f, b);
 146 }
 147
 148 /*
 149  * For random io, generate a random new block and see if it's used. Repeat
 150  * until we find a free one. For sequential io, just return the end of
 151  * the last io issued.
 152  */
 153 static int get_next_offset(struct thread_data *td, struct io_u *io_u)
 154 {
 155         struct fio_file *f = io_u->file;
 156         const int ddir = io_u->ddir;
 157         unsigned long long b;
 158
 159         if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) {
 160                 td->ddir_nr = td->o.ddir_nr;
 161
 162                 if (get_next_rand_offset(td, f, ddir, &b))
 163                         return 1;
 164         } else {
 165                 if (f->last_pos >= f->real_file_size) {
 166                         if (!td_random(td) || get_next_rand_offset(td, f, ddir, &b))
 167                                 return 1;
 168                 } else
 169                         b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir];
 170         }
 171
 172         io_u->offset = (b * td->o.min_bs[ddir]) + f->file_offset;
 173         if (io_u->offset >= f->real_file_size)
 174                 return 1;
 175
 176         return 0;
 177 }
 178
 179 static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
 180 {
 181         const int ddir = io_u->ddir;
 182         unsigned int buflen;
 183         long r;
 184
 185         if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
 186                 buflen = td->o.min_bs[ddir];
 187         else {
 188                 r = os_random_long(&td->bsrange_state);
 189                 if (!td->o.bssplit_nr)
 190                         buflen = (unsigned int) (1 + (double) (td->o.max_bs[ddir] - 1) * r / (RAND_MAX + 1.0));
 191                 else {
 192                         long perc = 0;
 193                         unsigned int i;
 194
 195                         for (i = 0; i < td->o.bssplit_nr; i++) {
 196                                 struct bssplit *bsp = &td->o.bssplit[i];
 197
 198                                 buflen = bsp->bs;
 199                                 perc += bsp->perc;
 200                                 if (r <= ((LONG_MAX / 100L) * perc))
 201                                         break;
 202                         }
 203                 }
 204                 if (!td->o.bs_unaligned)
 205                         buflen = (buflen + td->o.min_bs[ddir] - 1) & ~(td->o.min_bs[ddir] - 1);
 206         }
 207
 208         if (io_u->offset + buflen > io_u->file->real_file_size)
 209                 buflen = td->o.min_bs[ddir];
 210
 211         return buflen;
 212 }
 213
 214 static void set_rwmix_bytes(struct thread_data *td)
 215 {
 216         unsigned long long rbytes;
 217         unsigned int diff;
 218
 219         /*
 220          * we do time or byte based switch. this is needed because
 221          * buffered writes may issue a lot quicker than they complete,
 222          * whereas reads do not.
 223          */
 224         rbytes = td->io_bytes[td->rwmix_ddir] - td->rwmix_bytes;
 225         diff = td->o.rwmix[td->rwmix_ddir ^ 1];
 226
 227         td->rwmix_bytes = td->io_bytes[td->rwmix_ddir] + (rbytes * ((100 - diff)) / diff);
 228 }
 229
 230 static inline enum fio_ddir get_rand_ddir(struct thread_data *td)
 231 {
 232         unsigned int v;
 233         long r;
 234
 235         r = os_random_long(&td->rwmix_state);
 236         v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0)));
 237         if (v < td->o.rwmix[DDIR_READ])
 238                 return DDIR_READ;
 239
 240         return DDIR_WRITE;
 241 }
 242
 243 /*
 244  * Return the data direction for the next io_u. If the job is a
 245  * mixed read/write workload, check the rwmix cycle and switch if
 246  * necessary.
 247  */
 248 static enum fio_ddir get_rw_ddir(struct thread_data *td)
 249 {
 250         if (td_rw(td)) {
 251                 struct timeval now;
 252                 unsigned long elapsed;
 253                 unsigned int cycle;
 254
 255                 fio_gettime(&now, NULL);
 256                 elapsed = mtime_since_now(&td->rwmix_switch);
 257
 258                 /*
 259                  * if this is the first cycle, make it shorter
 260                  */
 261                 cycle = td->o.rwmixcycle;
 262                 if (!td->rwmix_bytes)
 263                         cycle /= 10;
 264
 265                 /*
 266                  * Check if it's time to seed a new data direction.
 267                  */
 268                 if (elapsed >= cycle ||
 269                     td->io_bytes[td->rwmix_ddir] >= td->rwmix_bytes) {
 270                         unsigned long long max_bytes;
 271                         enum fio_ddir ddir;
 272
 273                         /*
 274                          * Put a top limit on how many bytes we do for
 275                          * one data direction, to avoid overflowing the
 276                          * ranges too much
 277                          */
 278                         ddir = get_rand_ddir(td);
 279                         max_bytes = td->this_io_bytes[ddir];
 280                         if (max_bytes >= (td->o.size * td->o.rwmix[ddir] / 100)) {
 281                                 if (!td->rw_end_set[ddir]) {
 282                                         td->rw_end_set[ddir] = 1;
 283                                         memcpy(&td->rw_end[ddir], &now, sizeof(now));
 284                                 }
 285                                 ddir ^= 1;
 286                         }
 287
 288                         if (ddir != td->rwmix_ddir)
 289                                 set_rwmix_bytes(td);
 290
 291                         td->rwmix_ddir = ddir;
 292                         memcpy(&td->rwmix_switch, &now, sizeof(now));
 293                 }
 294                 return td->rwmix_ddir;
 295         } else if (td_read(td))
 296                 return DDIR_READ;
 297         else
 298                 return DDIR_WRITE;
 299 }
 300
 301 void put_io_u(struct thread_data *td, struct io_u *io_u)
 302 {
 303         assert((io_u->flags & IO_U_F_FREE) == 0);
 304         io_u->flags |= IO_U_F_FREE;
 305
 306         if (io_u->file)
 307                 put_file(td, io_u->file);
 308
 309         io_u->file = NULL;
 310         list_del(&io_u->list);
 311         list_add(&io_u->list, &td->io_u_freelist);
 312         td->cur_depth--;
 313 }
 314
 315 void requeue_io_u(struct thread_data *td, struct io_u **io_u)
 316 {
 317         struct io_u *__io_u = *io_u;
 318
 319         __io_u->flags |= IO_U_F_FREE;
 320         __io_u->flags &= ~IO_U_F_FLIGHT;
 321
 322         list_del(&__io_u->list);
 323         list_add_tail(&__io_u->list, &td->io_u_requeues);
 324         td->cur_depth--;
 325         *io_u = NULL;
 326 }
 327
 328 static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 329 {
 330         if (td->io_ops->flags & FIO_NOIO)
 331                 goto out;
 332
 333         /*
 334          * see if it's time to sync
 335          */
 336         if (td->o.fsync_blocks &&
 337            !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) &&
 338              td->io_issues[DDIR_WRITE] && should_fsync(td)) {
 339                 io_u->ddir = DDIR_SYNC;
 340                 goto out;
 341         }
 342
 343         io_u->ddir = get_rw_ddir(td);
 344
 345         /*
 346          * See if it's time to switch to a new zone
 347          */
 348         if (td->zone_bytes >= td->o.zone_size) {
 349                 td->zone_bytes = 0;
 350                 io_u->file->last_pos += td->o.zone_skip;
 351                 td->io_skip_bytes += td->o.zone_skip;
 352         }
 353
 354         /*
 355          * No log, let the seq/rand engine retrieve the next buflen and
 356          * position.
 357          */
 358         if (get_next_offset(td, io_u))
 359                 return 1;
 360
 361         io_u->buflen = get_next_buflen(td, io_u);
 362         if (!io_u->buflen)
 363                 return 1;
 364
 365         if (io_u->offset + io_u->buflen > io_u->file->real_file_size)
 366                 return 1;
 367
 368         /*
 369          * mark entry before potentially trimming io_u
 370          */
 371         if (td_random(td) && !td->o.norandommap)
 372                 mark_random_map(td, io_u);
 373
 374         /*
 375          * If using a write iolog, store this entry.
 376          */
 377 out:
 378         log_io_u(td, io_u);
 379         return 0;
 380 }
 381
 382 void io_u_mark_depth(struct thread_data *td, struct io_u *io_u)
 383 {
 384         int index = 0;
 385
 386         if (io_u->ddir == DDIR_SYNC)
 387                 return;
 388
 389         switch (td->cur_depth) {
 390         default:
 391                 index = 6;
 392                 break;
 393         case 32 ... 63:
 394                 index = 5;
 395                 break;
 396         case 16 ... 31:
 397                 index = 4;
 398                 break;
 399         case 8 ... 15:
 400                 index = 3;
 401                 break;
 402         case 4 ... 7:
 403                 index = 2;
 404                 break;
 405         case 2 ... 3:
 406                 index = 1;
 407         case 1:
 408                 break;
 409         }
 410
 411         td->ts.io_u_map[index]++;
 412         td->ts.total_io_u[io_u->ddir]++;
 413 }
 414
 415 static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec)
 416 {
 417         int index = 0;
 418
 419         assert(usec < 1000);
 420
 421         switch (usec) {
 422         case 750 ... 999:
 423                 index = 9;
 424                 break;
 425         case 500 ... 749:
 426                 index = 8;
 427                 break;
 428         case 250 ... 499:
 429                 index = 7;
 430                 break;
 431         case 100 ... 249:
 432                 index = 6;
 433                 break;
 434         case 50 ... 99:
 435                 index = 5;
 436                 break;
 437         case 20 ... 49:
 438                 index = 4;
 439                 break;
 440         case 10 ... 19:
 441                 index = 3;
 442                 break;
 443         case 4 ... 9:
 444                 index = 2;
 445                 break;
 446         case 2 ... 3:
 447                 index = 1;
 448         case 0 ... 1:
 449                 break;
 450         }
 451
 452         assert(index < FIO_IO_U_LAT_U_NR);
 453         td->ts.io_u_lat_u[index]++;
 454 }
 455
 456 static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec)
 457 {
 458         int index = 0;
 459
 460         switch (msec) {
 461         default:
 462                 index = 11;
 463                 break;
 464         case 1000 ... 1999:
 465                 index = 10;
 466                 break;
 467         case 750 ... 999:
 468                 index = 9;
 469                 break;
 470         case 500 ... 749:
 471                 index = 8;
 472                 break;
 473         case 250 ... 499:
 474                 index = 7;
 475                 break;
 476         case 100 ... 249:
 477                 index = 6;
 478                 break;
 479         case 50 ... 99:
 480                 index = 5;
 481                 break;
 482         case 20 ... 49:
 483                 index = 4;
 484                 break;
 485         case 10 ... 19:
 486                 index = 3;
 487                 break;
 488         case 4 ... 9:
 489                 index = 2;
 490                 break;
 491         case 2 ... 3:
 492                 index = 1;
 493         case 0 ... 1:
 494                 break;
 495         }
 496
 497         assert(index < FIO_IO_U_LAT_M_NR);
 498         td->ts.io_u_lat_m[index]++;
 499 }
 500
 501 static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
 502 {
 503         if (usec < 1000)
 504                 io_u_mark_lat_usec(td, usec);
 505         else
 506                 io_u_mark_lat_msec(td, usec / 1000);
 507 }
 508
 509 /*
 510  * Get next file to service by choosing one at random
 511  */
 512 static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf,
 513                                            int badf)
 514 {
 515         struct fio_file *f;
 516         int fno;
 517
 518         do {
 519                 long r = os_random_long(&td->next_file_state);
 520
 521                 fno = (unsigned int) ((double) td->o.nr_files * (r / (RAND_MAX + 1.0)));
 522                 f = &td->files[fno];
 523                 if (f->flags & FIO_FILE_DONE)
 524                         continue;
 525
 526                 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
 527                         return f;
 528         } while (1);
 529 }
 530
 531 /*
 532  * Get next file to service by doing round robin between all available ones
 533  */
 534 static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf,
 535                                          int badf)
 536 {
 537         unsigned int old_next_file = td->next_file;
 538         struct fio_file *f;
 539
 540         do {
 541                 f = &td->files[td->next_file];
 542
 543                 td->next_file++;
 544                 if (td->next_file >= td->o.nr_files)
 545                         td->next_file = 0;
 546
 547                 if (f->flags & FIO_FILE_DONE) {
 548                         f = NULL;
 549                         continue;
 550                 }
 551
 552                 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
 553                         break;
 554
 555                 f = NULL;
 556         } while (td->next_file != old_next_file);
 557
 558         return f;
 559 }
 560
 561 static struct fio_file *get_next_file(struct thread_data *td)
 562 {
 563         struct fio_file *f;
 564
 565         assert(td->o.nr_files <= td->files_index);
 566
 567         if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files)
 568                 return NULL;
 569
 570         f = td->file_service_file;
 571         if (f && (f->flags & FIO_FILE_OPEN) && td->file_service_left--)
 572                 return f;
 573
 574         if (td->o.file_service_type == FIO_FSERVICE_RR)
 575                 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING);
 576         else
 577                 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING);
 578
 579         td->file_service_file = f;
 580         td->file_service_left = td->file_service_nr - 1;
 581         return f;
 582 }
 583
 584 static struct fio_file *find_next_new_file(struct thread_data *td)
 585 {
 586         struct fio_file *f;
 587
 588         if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files)
 589                 return NULL;
 590
 591         if (td->o.file_service_type == FIO_FSERVICE_RR)
 592                 f = get_next_file_rr(td, 0, FIO_FILE_OPEN);
 593         else
 594                 f = get_next_file_rand(td, 0, FIO_FILE_OPEN);
 595
 596         return f;
 597 }
 598
 599 static int set_io_u_file(struct thread_data *td, struct io_u *io_u)
 600 {
 601         struct fio_file *f;
 602
 603         do {
 604                 f = get_next_file(td);
 605                 if (!f)
 606                         return 1;
 607
 608 set_file:
 609                 io_u->file = f;
 610                 get_file(f);
 611
 612                 if (!fill_io_u(td, io_u))
 613                         break;
 614
 615                 /*
 616                  * td_io_close() does a put_file() as well, so no need to
 617                  * do that here.
 618                  */
 619                 io_u->file = NULL;
 620                 td_io_close_file(td, f);
 621                 f->flags |= FIO_FILE_DONE;
 622                 td->nr_done_files++;
 623
 624                 /*
 625                  * probably not the right place to do this, but see
 626                  * if we need to open a new file
 627                  */
 628                 if (td->nr_open_files < td->o.open_files &&
 629                     td->o.open_files != td->o.nr_files) {
 630                         f = find_next_new_file(td);
 631
 632                         if (!f || td_io_open_file(td, f))
 633                                 return 1;
 634
 635                         goto set_file;
 636                 }
 637         } while (1);
 638
 639         return 0;
 640 }
 641
 642
 643 struct io_u *__get_io_u(struct thread_data *td)
 644 {
 645         struct io_u *io_u = NULL;
 646
 647         if (!list_empty(&td->io_u_requeues))
 648                 io_u = list_entry(td->io_u_requeues.next, struct io_u, list);
 649         else if (!queue_full(td)) {
 650                 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
 651
 652                 io_u->buflen = 0;
 653                 io_u->resid = 0;
 654                 io_u->file = NULL;
 655                 io_u->end_io = NULL;
 656         }
 657
 658         if (io_u) {
 659                 assert(io_u->flags & IO_U_F_FREE);
 660                 io_u->flags &= ~IO_U_F_FREE;
 661
 662                 io_u->error = 0;
 663                 list_del(&io_u->list);
 664                 list_add(&io_u->list, &td->io_u_busylist);
 665                 td->cur_depth++;
 666         }
 667
 668         return io_u;
 669 }
 670
 671 /*
 672  * Return an io_u to be processed. Gets a buflen and offset, sets direction,
 673  * etc. The returned io_u is fully ready to be prepped and submitted.
 674  */
 675 struct io_u *get_io_u(struct thread_data *td)
 676 {
 677         struct fio_file *f;
 678         struct io_u *io_u;
 679
 680         io_u = __get_io_u(td);
 681         if (!io_u)
 682                 return NULL;
 683
 684         /*
 685          * from a requeue, io_u already setup
 686          */
 687         if (io_u->file)
 688                 goto out;
 689
 690         /*
 691          * If using an iolog, grab next piece if any available.
 692          */
 693         if (td->o.read_iolog_file) {
 694                 if (read_iolog_get(td, io_u))
 695                         goto err_put;
 696         } else if (set_io_u_file(td, io_u))
 697                 goto err_put;
 698
 699         f = io_u->file;
 700         assert(f->flags & FIO_FILE_OPEN);
 701
 702         if (io_u->ddir != DDIR_SYNC) {
 703                 if (!io_u->buflen)
 704                         goto err_put;
 705
 706                 f->last_pos = io_u->offset + io_u->buflen;
 707
 708                 if (td->o.verify != VERIFY_NONE)
 709                         populate_verify_io_u(td, io_u);
 710         }
 711
 712         /*
 713          * Set io data pointers.
 714          */
 715         io_u->endpos = io_u->offset + io_u->buflen;
 716 out:
 717         io_u->xfer_buf = io_u->buf;
 718         io_u->xfer_buflen = io_u->buflen;
 719
 720         if (!td_io_prep(td, io_u)) {
 721                 fio_gettime(&io_u->start_time, NULL);
 722                 return io_u;
 723         }
 724 err_put:
 725         put_io_u(td, io_u);
 726         return NULL;
 727 }
 728
 729 void io_u_log_error(struct thread_data *td, struct io_u *io_u)
 730 {
 731         const char *msg[] = { "read", "write", "sync" };
 732
 733         log_err("fio: io_u error");
 734
 735         if (io_u->file)
 736                 log_err(" on file %s", io_u->file->file_name);
 737
 738         log_err(": %s\n", strerror(io_u->error));
 739
 740         log_err("     %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], io_u->offset, io_u->xfer_buflen);
 741
 742         if (!td->error)
 743                 td_verror(td, io_u->error, "io_u error");
 744 }
 745
 746 static void io_completed(struct thread_data *td, struct io_u *io_u,
 747                          struct io_completion_data *icd)
 748 {
 749         unsigned long usec;
 750
 751         assert(io_u->flags & IO_U_F_FLIGHT);
 752         io_u->flags &= ~IO_U_F_FLIGHT;
 753
 754         if (io_u->ddir == DDIR_SYNC) {
 755                 td->last_was_sync = 1;
 756                 return;
 757         }
 758
 759         td->last_was_sync = 0;
 760
 761         if (!io_u->error) {
 762                 unsigned int bytes = io_u->buflen - io_u->resid;
 763                 const enum fio_ddir idx = io_u->ddir;
 764                 int ret;
 765
 766                 td->io_blocks[idx]++;
 767                 td->io_bytes[idx] += bytes;
 768                 td->zone_bytes += bytes;
 769                 td->this_io_bytes[idx] += bytes;
 770
 771                 io_u->file->last_completed_pos = io_u->endpos;
 772
 773                 usec = utime_since(&io_u->issue_time, &icd->time);
 774
 775                 add_clat_sample(td, idx, usec);
 776                 add_bw_sample(td, idx, &icd->time);
 777                 io_u_mark_latency(td, usec);
 778
 779                 if (td_write(td) && idx == DDIR_WRITE &&
 780                     td->o.do_verify &&
 781                     td->o.verify != VERIFY_NONE)
 782                         log_io_piece(td, io_u);
 783
 784                 icd->bytes_done[idx] += bytes;
 785
 786                 if (io_u->end_io) {
 787                         ret = io_u->end_io(td, io_u);
 788                         if (ret && !icd->error)
 789                                 icd->error = ret;
 790                 }
 791         } else {
 792                 icd->error = io_u->error;
 793                 io_u_log_error(td, io_u);
 794         }
 795 }
 796
 797 static void init_icd(struct io_completion_data *icd, int nr)
 798 {
 799         fio_gettime(&icd->time, NULL);
 800
 801         icd->nr = nr;
 802
 803         icd->error = 0;
 804         icd->bytes_done[0] = icd->bytes_done[1] = 0;
 805 }
 806
 807 static void ios_completed(struct thread_data *td,
 808                           struct io_completion_data *icd)
 809 {
 810         struct io_u *io_u;
 811         int i;
 812
 813         for (i = 0; i < icd->nr; i++) {
 814                 io_u = td->io_ops->event(td, i);
 815
 816                 io_completed(td, io_u, icd);
 817                 put_io_u(td, io_u);
 818         }
 819 }
 820
 821 /*
 822  * Complete a single io_u for the sync engines.
 823  */
 824 long io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
 825 {
 826         struct io_completion_data icd;
 827
 828         init_icd(&icd, 1);
 829         io_completed(td, io_u, &icd);
 830         put_io_u(td, io_u);
 831
 832         if (!icd.error)
 833                 return icd.bytes_done[0] + icd.bytes_done[1];
 834
 835         td_verror(td, icd.error, "io_u_sync_complete");
 836         return -1;
 837 }
 838
 839 /*
 840  * Called to complete min_events number of io for the async engines.
 841  */
 842 long io_u_queued_complete(struct thread_data *td, int min_events)
 843 {
 844         struct io_completion_data icd;
 845         struct timespec *tvp = NULL;
 846         int ret;
 847         struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
 848
 849         if (!min_events)
 850                 tvp = &ts;
 851
 852         ret = td_io_getevents(td, min_events, td->cur_depth, tvp);
 853         if (ret < 0) {
 854                 td_verror(td, -ret, "td_io_getevents");
 855                 return ret;
 856         } else if (!ret)
 857                 return ret;
 858
 859         init_icd(&icd, ret);
 860         ios_completed(td, &icd);
 861         if (!icd.error)
 862                 return icd.bytes_done[0] + icd.bytes_done[1];
 863
 864         td_verror(td, icd.error, "io_u_queued_complete");
 865         return -1;
 866 }
 867
 868 /*
 869  * Call when io_u is really queued, to update the submission latency.
 870  */
 871 void io_u_queued(struct thread_data *td, struct io_u *io_u)
 872 {
 873         unsigned long slat_time;
 874
 875         slat_time = utime_since(&io_u->start_time, &io_u->issue_time);
 876         add_slat_sample(td, io_u->ddir, slat_time);
 877 }
 878
 879 #ifdef FIO_USE_TIMEOUT
 880 void io_u_set_timeout(struct thread_data *td)
 881 {
 882         assert(td->cur_depth);
 883
 884         td->timer.it_interval.tv_sec = 0;
 885         td->timer.it_interval.tv_usec = 0;
 886         td->timer.it_value.tv_sec = IO_U_TIMEOUT + IO_U_TIMEOUT_INC;
 887         td->timer.it_value.tv_usec = 0;
 888         setitimer(ITIMER_REAL, &td->timer, NULL);
 889         fio_gettime(&td->timeout_end, NULL);
 890 }
 891
 892 static void io_u_dump(struct io_u *io_u)
 893 {
 894         unsigned long t_start = mtime_since_now(&io_u->start_time);
 895         unsigned long t_issue = mtime_since_now(&io_u->issue_time);
 896
 897         log_err("io_u=%p, t_start=%lu, t_issue=%lu\n", io_u, t_start, t_issue);
 898         log_err("  buf=%p/%p, len=%lu/%lu, offset=%llu\n", io_u->buf, io_u->xfer_buf, io_u->buflen, io_u->xfer_buflen, io_u->offset);
 899         log_err("  ddir=%d, fname=%s\n", io_u->ddir, io_u->file->file_name);
 900 }
 901 #else
 902 void io_u_set_timeout(struct thread_data fio_unused *td)
 903 {
 904 }
 905 #endif
 906
 907 #ifdef FIO_USE_TIMEOUT
 908 static void io_u_timeout_handler(int fio_unused sig)
 909 {
 910         struct thread_data *td, *__td;
 911         pid_t pid = getpid();
 912         struct list_head *entry;
 913         struct io_u *io_u;
 914         int i;
 915
 916         log_err("fio: io_u timeout\n");
 917
 918         /*
 919          * TLS would be nice...
 920          */
 921         td = NULL;
 922         for_each_td(__td, i) {
 923                 if (__td->pid == pid) {
 924                         td = __td;
 925                         break;
 926                 }
 927         }
 928
 929         if (!td) {
 930                 log_err("fio: io_u timeout, can't find job\n");
 931                 exit(1);
 932         }
 933
 934         if (!td->cur_depth) {
 935                 log_err("fio: timeout without pending work?\n");
 936                 return;
 937         }
 938
 939         log_err("fio: io_u timeout: job=%s, pid=%d\n", td->o.name, td->pid);
 940
 941         list_for_each(entry, &td->io_u_busylist) {
 942                 io_u = list_entry(entry, struct io_u, list);
 943
 944                 io_u_dump(io_u);
 945         }
 946
 947         td_verror(td, ETIMEDOUT, "io_u timeout");
 948         exit(1);
 949 }
 950 #endif
 951
 952 void io_u_init_timeout(void)
 953 {
 954 #ifdef FIO_USE_TIMEOUT
 955         signal(SIGALRM, io_u_timeout_handler);
 956 #endif
 957 }