io_u.c

   1 #include <unistd.h>
   2 #include <fcntl.h>
   3 #include <string.h>
   4 #include <signal.h>
   5 #include <time.h>
   6 #include <assert.h>
   7
   8 #include "fio.h"
   9
  10 /*
  11  * Change this define to play with the timeout handling
  12  */
  13 #undef FIO_USE_TIMEOUT
  14
  15 struct io_completion_data {
  16         int nr;                         /* input */
  17
  18         int error;                      /* output */
  19         unsigned long bytes_done[2];    /* output */
  20         struct timeval time;            /* output */
  21 };
  22
  23 /*
  24  * The ->file_map[] contains a map of blocks we have or have not done io
  25  * to yet. Used to make sure we cover the entire range in a fair fashion.
  26  */
  27 static int random_map_free(struct thread_data *td, struct fio_file *f,
  28                            const unsigned long long block)
  29 {
  30         unsigned int idx = RAND_MAP_IDX(td, f, block);
  31         unsigned int bit = RAND_MAP_BIT(td, f, block);
  32
  33         dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit);
  34
  35         return (f->file_map[idx] & (1UL << bit)) == 0;
  36 }
  37
  38 /*
  39  * Mark a given offset as used in the map.
  40  */
  41 static void mark_random_map(struct thread_data *td, struct io_u *io_u)
  42 {
  43         unsigned int min_bs = td->o.rw_min_bs;
  44         struct fio_file *f = io_u->file;
  45         unsigned long long block;
  46         unsigned int blocks;
  47         unsigned int nr_blocks;
  48
  49         block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs;
  50         blocks = 0;
  51         nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
  52
  53         while (blocks < nr_blocks) {
  54                 unsigned int idx, bit;
  55
  56                 /*
  57                  * If we have a mixed random workload, we may
  58                  * encounter blocks we already did IO to.
  59                  */
  60                 if ((td->o.ddir_nr == 1) && !random_map_free(td, f, block))
  61                         break;
  62
  63                 idx = RAND_MAP_IDX(td, f, block);
  64                 bit = RAND_MAP_BIT(td, f, block);
  65
  66                 fio_assert(td, idx < f->num_maps);
  67
  68                 f->file_map[idx] |= (1UL << bit);
  69                 block++;
  70                 blocks++;
  71         }
  72
  73         if ((blocks * min_bs) < io_u->buflen)
  74                 io_u->buflen = blocks * min_bs;
  75 }
  76
  77 static inline unsigned long long last_block(struct thread_data *td,
  78                                             struct fio_file *f,
  79                                             enum fio_ddir ddir)
  80 {
  81         unsigned long long max_blocks;
  82
  83         max_blocks = f->io_size / (unsigned long long) td->o.min_bs[ddir];
  84         if (!max_blocks)
  85                 return 0;
  86
  87         return max_blocks - 1;
  88 }
  89
  90 /*
  91  * Return the next free block in the map.
  92  */
  93 static int get_next_free_block(struct thread_data *td, struct fio_file *f,
  94                                enum fio_ddir ddir, unsigned long long *b)
  95 {
  96         unsigned long long min_bs = td->o.rw_min_bs;
  97         int i;
  98
  99         i = f->last_free_lookup;
 100         *b = (i * BLOCKS_PER_MAP);
 101         while ((*b) * min_bs < f->real_file_size) {
 102                 if (f->file_map[i] != -1UL) {
 103                         *b += fio_ffz(f->file_map[i]);
 104                         if (*b > last_block(td, f, ddir))
 105                                 break;
 106                         f->last_free_lookup = i;
 107                         return 0;
 108                 }
 109
 110                 *b += BLOCKS_PER_MAP;
 111                 i++;
 112         }
 113
 114         dprint(FD_IO, "failed finding a free block\n");
 115         return 1;
 116 }
 117
 118 static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
 119                                 enum fio_ddir ddir, unsigned long long *b)
 120 {
 121         unsigned long long r;
 122         int loops = 5;
 123
 124         do {
 125                 r = os_random_long(&td->random_state);
 126                 dprint(FD_RANDOM, "off rand %llu\n", r);
 127                 *b = (last_block(td, f, ddir) - 1)
 128                         * (r / ((unsigned long long) RAND_MAX + 1.0));
 129
 130                 /*
 131                  * if we are not maintaining a random map, we are done.
 132                  */
 133                 if (td->o.norandommap)
 134                         return 0;
 135
 136                 /*
 137                  * calculate map offset and check if it's free
 138                  */
 139                 if (random_map_free(td, f, *b))
 140                         return 0;
 141
 142                 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n",
 143                                                                         *b);
 144         } while (--loops);
 145
 146         /*
 147          * we get here, if we didn't suceed in looking up a block. generate
 148          * a random start offset into the filemap, and find the first free
 149          * block from there.
 150          */
 151         loops = 10;
 152         do {
 153                 f->last_free_lookup = (f->num_maps - 1) * (r / (RAND_MAX+1.0));
 154                 if (!get_next_free_block(td, f, ddir, b))
 155                         return 0;
 156
 157                 r = os_random_long(&td->random_state);
 158         } while (--loops);
 159
 160         /*
 161          * that didn't work either, try exhaustive search from the start
 162          */
 163         f->last_free_lookup = 0;
 164         return get_next_free_block(td, f, ddir, b);
 165 }
 166
 167 /*
 168  * For random io, generate a random new block and see if it's used. Repeat
 169  * until we find a free one. For sequential io, just return the end of
 170  * the last io issued.
 171  */
 172 static int get_next_offset(struct thread_data *td, struct io_u *io_u)
 173 {
 174         struct fio_file *f = io_u->file;
 175         unsigned long long b;
 176         enum fio_ddir ddir = io_u->ddir;
 177
 178         if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) {
 179                 td->ddir_nr = td->o.ddir_nr;
 180
 181                 if (get_next_rand_offset(td, f, ddir, &b))
 182                         return 1;
 183         } else {
 184                 if (f->last_pos >= f->real_file_size) {
 185                         if (!td_random(td) ||
 186                              get_next_rand_offset(td, f, ddir, &b))
 187                                 return 1;
 188                 } else
 189                         b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir];
 190         }
 191
 192         io_u->offset = (b * td->o.min_bs[ddir]) + f->file_offset;
 193         if (io_u->offset >= f->real_file_size) {
 194                 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n",
 195                                         io_u->offset, f->real_file_size);
 196                 return 1;
 197         }
 198
 199         return 0;
 200 }
 201
 202 static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u)
 203 {
 204         const int ddir = io_u->ddir;
 205         unsigned int buflen;
 206         long r;
 207
 208         if (td->o.min_bs[ddir] == td->o.max_bs[ddir])
 209                 buflen = td->o.min_bs[ddir];
 210         else {
 211                 r = os_random_long(&td->bsrange_state);
 212                 if (!td->o.bssplit_nr) {
 213                         buflen = (unsigned int)
 214                                         (1 + (double) (td->o.max_bs[ddir] - 1)
 215                                         * r / (RAND_MAX + 1.0));
 216                 } else {
 217                         long perc = 0;
 218                         unsigned int i;
 219
 220                         for (i = 0; i < td->o.bssplit_nr; i++) {
 221                                 struct bssplit *bsp = &td->o.bssplit[i];
 222
 223                                 buflen = bsp->bs;
 224                                 perc += bsp->perc;
 225                                 if (r <= ((LONG_MAX / 100L) * perc))
 226                                         break;
 227                         }
 228                 }
 229                 if (!td->o.bs_unaligned) {
 230                         buflen = (buflen + td->o.min_bs[ddir] - 1)
 231                                         & ~(td->o.min_bs[ddir] - 1);
 232                 }
 233         }
 234
 235         if (io_u->offset + buflen > io_u->file->real_file_size) {
 236                 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen,
 237                                                 td->o.min_bs[ddir], ddir);
 238                 buflen = td->o.min_bs[ddir];
 239         }
 240
 241         return buflen;
 242 }
 243
 244 static void set_rwmix_bytes(struct thread_data *td)
 245 {
 246         unsigned long long rbytes;
 247         unsigned int diff;
 248
 249         /*
 250          * we do time or byte based switch. this is needed because
 251          * buffered writes may issue a lot quicker than they complete,
 252          * whereas reads do not.
 253          */
 254         rbytes = td->io_bytes[td->rwmix_ddir] - td->rwmix_bytes;
 255         diff = td->o.rwmix[td->rwmix_ddir ^ 1];
 256
 257         td->rwmix_bytes = td->io_bytes[td->rwmix_ddir]
 258                                 + (rbytes * ((100 - diff)) / diff);
 259 }
 260
 261 static inline enum fio_ddir get_rand_ddir(struct thread_data *td)
 262 {
 263         unsigned int v;
 264         long r;
 265
 266         r = os_random_long(&td->rwmix_state);
 267         v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0)));
 268         if (v < td->o.rwmix[DDIR_READ])
 269                 return DDIR_READ;
 270
 271         return DDIR_WRITE;
 272 }
 273
 274 /*
 275  * Return the data direction for the next io_u. If the job is a
 276  * mixed read/write workload, check the rwmix cycle and switch if
 277  * necessary.
 278  */
 279 static enum fio_ddir get_rw_ddir(struct thread_data *td)
 280 {
 281         if (td_rw(td)) {
 282                 struct timeval now;
 283                 unsigned long elapsed;
 284                 unsigned int cycle;
 285
 286                 fio_gettime(&now, NULL);
 287                 elapsed = mtime_since_now(&td->rwmix_switch);
 288
 289                 /*
 290                  * if this is the first cycle, make it shorter
 291                  */
 292                 cycle = td->o.rwmixcycle;
 293                 if (!td->rwmix_bytes)
 294                         cycle /= 10;
 295
 296                 /*
 297                  * Check if it's time to seed a new data direction.
 298                  */
 299                 if (elapsed >= cycle ||
 300                     td->io_bytes[td->rwmix_ddir] >= td->rwmix_bytes) {
 301                         unsigned long long max_bytes;
 302                         enum fio_ddir ddir;
 303
 304                         /*
 305                          * Put a top limit on how many bytes we do for
 306                          * one data direction, to avoid overflowing the
 307                          * ranges too much
 308                          */
 309                         ddir = get_rand_ddir(td);
 310                         max_bytes = td->this_io_bytes[ddir];
 311                         if (max_bytes >=
 312                             (td->o.size * td->o.rwmix[ddir] / 100)) {
 313                                 if (!td->rw_end_set[ddir]) {
 314                                         td->rw_end_set[ddir] = 1;
 315                                         memcpy(&td->rw_end[ddir], &now,
 316                                                 sizeof(now));
 317                                 }
 318                                 ddir ^= 1;
 319                         }
 320
 321                         if (ddir != td->rwmix_ddir)
 322                                 set_rwmix_bytes(td);
 323
 324                         td->rwmix_ddir = ddir;
 325                         memcpy(&td->rwmix_switch, &now, sizeof(now));
 326                 }
 327                 return td->rwmix_ddir;
 328         } else if (td_read(td))
 329                 return DDIR_READ;
 330         else
 331                 return DDIR_WRITE;
 332 }
 333
 334 void put_io_u(struct thread_data *td, struct io_u *io_u)
 335 {
 336         assert((io_u->flags & IO_U_F_FREE) == 0);
 337         io_u->flags |= IO_U_F_FREE;
 338
 339         if (io_u->file) {
 340                 int ret = put_file(td, io_u->file);
 341
 342                 if (ret)
 343                         td_verror(td, ret, "file close");
 344         }
 345
 346         io_u->file = NULL;
 347         list_del(&io_u->list);
 348         list_add(&io_u->list, &td->io_u_freelist);
 349         td->cur_depth--;
 350 }
 351
 352 void requeue_io_u(struct thread_data *td, struct io_u **io_u)
 353 {
 354         struct io_u *__io_u = *io_u;
 355
 356         __io_u->flags |= IO_U_F_FREE;
 357         if ((__io_u->flags & IO_U_F_FLIGHT) && (__io_u->ddir != DDIR_SYNC))
 358                 td->io_issues[__io_u->ddir]--;
 359
 360         __io_u->flags &= ~IO_U_F_FLIGHT;
 361
 362         list_del(&__io_u->list);
 363         list_add_tail(&__io_u->list, &td->io_u_requeues);
 364         td->cur_depth--;
 365         *io_u = NULL;
 366 }
 367
 368 static int fill_io_u(struct thread_data *td, struct io_u *io_u)
 369 {
 370         if (td->io_ops->flags & FIO_NOIO)
 371                 goto out;
 372
 373         /*
 374          * see if it's time to sync
 375          */
 376         if (td->o.fsync_blocks &&
 377            !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) &&
 378              td->io_issues[DDIR_WRITE] && should_fsync(td)) {
 379                 io_u->ddir = DDIR_SYNC;
 380                 goto out;
 381         }
 382
 383         io_u->ddir = get_rw_ddir(td);
 384
 385         /*
 386          * See if it's time to switch to a new zone
 387          */
 388         if (td->zone_bytes >= td->o.zone_size) {
 389                 td->zone_bytes = 0;
 390                 io_u->file->last_pos += td->o.zone_skip;
 391                 td->io_skip_bytes += td->o.zone_skip;
 392         }
 393
 394         /*
 395          * No log, let the seq/rand engine retrieve the next buflen and
 396          * position.
 397          */
 398         if (get_next_offset(td, io_u)) {
 399                 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u);
 400                 return 1;
 401         }
 402
 403         io_u->buflen = get_next_buflen(td, io_u);
 404         if (!io_u->buflen) {
 405                 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u);
 406                 return 1;
 407         }
 408
 409         if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
 410                 dprint(FD_IO, "io_u %p, offset too large\n", io_u);
 411                 dprint(FD_IO, "  off=%llu/%lu > %llu\n", io_u->offset,
 412                                 io_u->buflen, io_u->file->real_file_size);
 413                 return 1;
 414         }
 415
 416         /*
 417          * mark entry before potentially trimming io_u
 418          */
 419         if (td_random(td) && !td->o.norandommap)
 420                 mark_random_map(td, io_u);
 421
 422         /*
 423          * If using a write iolog, store this entry.
 424          */
 425 out:
 426         dprint_io_u(io_u, "fill_io_u");
 427         td->zone_bytes += io_u->buflen;
 428         log_io_u(td, io_u);
 429         return 0;
 430 }
 431
 432 void io_u_mark_depth(struct thread_data *td, struct io_u *io_u)
 433 {
 434         int index = 0;
 435
 436         if (io_u->ddir == DDIR_SYNC)
 437                 return;
 438
 439         switch (td->cur_depth) {
 440         default:
 441                 index = 6;
 442                 break;
 443         case 32 ... 63:
 444                 index = 5;
 445                 break;
 446         case 16 ... 31:
 447                 index = 4;
 448                 break;
 449         case 8 ... 15:
 450                 index = 3;
 451                 break;
 452         case 4 ... 7:
 453                 index = 2;
 454                 break;
 455         case 2 ... 3:
 456                 index = 1;
 457         case 1:
 458                 break;
 459         }
 460
 461         td->ts.io_u_map[index]++;
 462         td->ts.total_io_u[io_u->ddir]++;
 463 }
 464
 465 static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec)
 466 {
 467         int index = 0;
 468
 469         assert(usec < 1000);
 470
 471         switch (usec) {
 472         case 750 ... 999:
 473                 index = 9;
 474                 break;
 475         case 500 ... 749:
 476                 index = 8;
 477                 break;
 478         case 250 ... 499:
 479                 index = 7;
 480                 break;
 481         case 100 ... 249:
 482                 index = 6;
 483                 break;
 484         case 50 ... 99:
 485                 index = 5;
 486                 break;
 487         case 20 ... 49:
 488                 index = 4;
 489                 break;
 490         case 10 ... 19:
 491                 index = 3;
 492                 break;
 493         case 4 ... 9:
 494                 index = 2;
 495                 break;
 496         case 2 ... 3:
 497                 index = 1;
 498         case 0 ... 1:
 499                 break;
 500         }
 501
 502         assert(index < FIO_IO_U_LAT_U_NR);
 503         td->ts.io_u_lat_u[index]++;
 504 }
 505
 506 static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec)
 507 {
 508         int index = 0;
 509
 510         switch (msec) {
 511         default:
 512                 index = 11;
 513                 break;
 514         case 1000 ... 1999:
 515                 index = 10;
 516                 break;
 517         case 750 ... 999:
 518                 index = 9;
 519                 break;
 520         case 500 ... 749:
 521                 index = 8;
 522                 break;
 523         case 250 ... 499:
 524                 index = 7;
 525                 break;
 526         case 100 ... 249:
 527                 index = 6;
 528                 break;
 529         case 50 ... 99:
 530                 index = 5;
 531                 break;
 532         case 20 ... 49:
 533                 index = 4;
 534                 break;
 535         case 10 ... 19:
 536                 index = 3;
 537                 break;
 538         case 4 ... 9:
 539                 index = 2;
 540                 break;
 541         case 2 ... 3:
 542                 index = 1;
 543         case 0 ... 1:
 544                 break;
 545         }
 546
 547         assert(index < FIO_IO_U_LAT_M_NR);
 548         td->ts.io_u_lat_m[index]++;
 549 }
 550
 551 static void io_u_mark_latency(struct thread_data *td, unsigned long usec)
 552 {
 553         if (usec < 1000)
 554                 io_u_mark_lat_usec(td, usec);
 555         else
 556                 io_u_mark_lat_msec(td, usec / 1000);
 557 }
 558
 559 /*
 560  * Get next file to service by choosing one at random
 561  */
 562 static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf,
 563                                            int badf)
 564 {
 565         struct fio_file *f;
 566         int fno;
 567
 568         do {
 569                 long r = os_random_long(&td->next_file_state);
 570
 571                 fno = (unsigned int) ((double) td->o.nr_files
 572                         * (r / (RAND_MAX + 1.0)));
 573                 f = td->files[fno];
 574                 if (f->flags & FIO_FILE_DONE)
 575                         continue;
 576
 577                 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) {
 578                         dprint(FD_FILE, "get_next_file_rand: %p\n", f);
 579                         return f;
 580                 }
 581         } while (1);
 582 }
 583
 584 /*
 585  * Get next file to service by doing round robin between all available ones
 586  */
 587 static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf,
 588                                          int badf)
 589 {
 590         unsigned int old_next_file = td->next_file;
 591         struct fio_file *f;
 592
 593         do {
 594                 f = td->files[td->next_file];
 595
 596                 td->next_file++;
 597                 if (td->next_file >= td->o.nr_files)
 598                         td->next_file = 0;
 599
 600                 if (f->flags & FIO_FILE_DONE) {
 601                         f = NULL;
 602                         continue;
 603                 }
 604
 605                 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf))
 606                         break;
 607
 608                 f = NULL;
 609         } while (td->next_file != old_next_file);
 610
 611         dprint(FD_FILE, "get_next_file_rr: %p\n", f);
 612         return f;
 613 }
 614
 615 static struct fio_file *get_next_file(struct thread_data *td)
 616 {
 617         struct fio_file *f;
 618
 619         assert(td->o.nr_files <= td->files_index);
 620
 621         if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files) {
 622                 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d,"
 623                                 " nr_files=%d\n", td->nr_open_files,
 624                                                   td->nr_done_files,
 625                                                   td->o.nr_files);
 626                 return NULL;
 627         }
 628
 629         f = td->file_service_file;
 630         if (f && (f->flags & FIO_FILE_OPEN) && td->file_service_left--)
 631                 goto out;
 632
 633         if (td->o.file_service_type == FIO_FSERVICE_RR)
 634                 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING);
 635         else
 636                 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING);
 637
 638         td->file_service_file = f;
 639         td->file_service_left = td->file_service_nr - 1;
 640 out:
 641         dprint(FD_FILE, "get_next_file: %p\n", f);
 642         return f;
 643 }
 644
 645 static struct fio_file *find_next_new_file(struct thread_data *td)
 646 {
 647         struct fio_file *f;
 648
 649         if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files)
 650                 return NULL;
 651
 652         if (td->o.file_service_type == FIO_FSERVICE_RR)
 653                 f = get_next_file_rr(td, 0, FIO_FILE_OPEN);
 654         else
 655                 f = get_next_file_rand(td, 0, FIO_FILE_OPEN);
 656
 657         return f;
 658 }
 659
 660 static int set_io_u_file(struct thread_data *td, struct io_u *io_u)
 661 {
 662         struct fio_file *f;
 663
 664         do {
 665                 f = get_next_file(td);
 666                 if (!f)
 667                         return 1;
 668
 669 set_file:
 670                 io_u->file = f;
 671                 get_file(f);
 672
 673                 if (!fill_io_u(td, io_u))
 674                         break;
 675
 676                 /*
 677                  * td_io_close() does a put_file() as well, so no need to
 678                  * do that here.
 679                  */
 680                 io_u->file = NULL;
 681                 td_io_close_file(td, f);
 682                 f->flags |= FIO_FILE_DONE;
 683                 td->nr_done_files++;
 684
 685                 /*
 686                  * probably not the right place to do this, but see
 687                  * if we need to open a new file
 688                  */
 689                 if (td->nr_open_files < td->o.open_files &&
 690                     td->o.open_files != td->o.nr_files) {
 691                         f = find_next_new_file(td);
 692
 693                         if (!f || td_io_open_file(td, f))
 694                                 return 1;
 695
 696                         goto set_file;
 697                 }
 698         } while (1);
 699
 700         return 0;
 701 }
 702
 703
 704 struct io_u *__get_io_u(struct thread_data *td)
 705 {
 706         struct io_u *io_u = NULL;
 707
 708         if (!list_empty(&td->io_u_requeues))
 709                 io_u = list_entry(td->io_u_requeues.next, struct io_u, list);
 710         else if (!queue_full(td)) {
 711                 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
 712
 713                 io_u->buflen = 0;
 714                 io_u->resid = 0;
 715                 io_u->file = NULL;
 716                 io_u->end_io = NULL;
 717         }
 718
 719         if (io_u) {
 720                 assert(io_u->flags & IO_U_F_FREE);
 721                 io_u->flags &= ~IO_U_F_FREE;
 722
 723                 io_u->error = 0;
 724                 list_del(&io_u->list);
 725                 list_add(&io_u->list, &td->io_u_busylist);
 726                 td->cur_depth++;
 727         }
 728
 729         return io_u;
 730 }
 731
 732 /*
 733  * Return an io_u to be processed. Gets a buflen and offset, sets direction,
 734  * etc. The returned io_u is fully ready to be prepped and submitted.
 735  */
 736 struct io_u *get_io_u(struct thread_data *td)
 737 {
 738         struct fio_file *f;
 739         struct io_u *io_u;
 740
 741         io_u = __get_io_u(td);
 742         if (!io_u) {
 743                 dprint(FD_IO, "__get_io_u failed\n");
 744                 return NULL;
 745         }
 746
 747         /*
 748          * from a requeue, io_u already setup
 749          */
 750         if (io_u->file)
 751                 goto out;
 752
 753         /*
 754          * If using an iolog, grab next piece if any available.
 755          */
 756         if (td->o.read_iolog_file) {
 757                 if (read_iolog_get(td, io_u))
 758                         goto err_put;
 759         } else if (set_io_u_file(td, io_u)) {
 760                 dprint(FD_IO, "io_u %p, setting file failed\n", io_u);
 761                 goto err_put;
 762         }
 763
 764         f = io_u->file;
 765         assert(f->flags & FIO_FILE_OPEN);
 766
 767         if (io_u->ddir != DDIR_SYNC) {
 768                 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) {
 769                         dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u);
 770                         goto err_put;
 771                 }
 772
 773                 f->last_pos = io_u->offset + io_u->buflen;
 774
 775                 if (td->o.verify != VERIFY_NONE)
 776                         populate_verify_io_u(td, io_u);
 777         }
 778
 779         /*
 780          * Set io data pointers.
 781          */
 782         io_u->endpos = io_u->offset + io_u->buflen;
 783         io_u->xfer_buf = io_u->buf;
 784         io_u->xfer_buflen = io_u->buflen;
 785 out:
 786         if (!td_io_prep(td, io_u)) {
 787                 fio_gettime(&io_u->start_time, NULL);
 788                 return io_u;
 789         }
 790 err_put:
 791         dprint(FD_IO, "get_io_u failed\n");
 792         put_io_u(td, io_u);
 793         return NULL;
 794 }
 795
 796 void io_u_log_error(struct thread_data *td, struct io_u *io_u)
 797 {
 798         const char *msg[] = { "read", "write", "sync" };
 799
 800         log_err("fio: io_u error");
 801
 802         if (io_u->file)
 803                 log_err(" on file %s", io_u->file->file_name);
 804
 805         log_err(": %s\n", strerror(io_u->error));
 806
 807         log_err("     %s offset=%llu, buflen=%lu\n", msg[io_u->ddir],
 808                                         io_u->offset, io_u->xfer_buflen);
 809
 810         if (!td->error)
 811                 td_verror(td, io_u->error, "io_u error");
 812 }
 813
 814 static void io_completed(struct thread_data *td, struct io_u *io_u,
 815                          struct io_completion_data *icd)
 816 {
 817         unsigned long usec;
 818
 819         dprint_io_u(io_u, "io complete");
 820
 821         assert(io_u->flags & IO_U_F_FLIGHT);
 822         io_u->flags &= ~IO_U_F_FLIGHT;
 823
 824         if (io_u->ddir == DDIR_SYNC) {
 825                 td->last_was_sync = 1;
 826                 return;
 827         }
 828
 829         td->last_was_sync = 0;
 830
 831         if (!io_u->error) {
 832                 unsigned int bytes = io_u->buflen - io_u->resid;
 833                 const enum fio_ddir idx = io_u->ddir;
 834                 int ret;
 835
 836                 td->io_blocks[idx]++;
 837                 td->io_bytes[idx] += bytes;
 838                 td->this_io_bytes[idx] += bytes;
 839
 840                 usec = utime_since(&io_u->issue_time, &icd->time);
 841
 842                 add_clat_sample(td, idx, usec);
 843                 add_bw_sample(td, idx, &icd->time);
 844                 io_u_mark_latency(td, usec);
 845
 846                 if (td_write(td) && idx == DDIR_WRITE &&
 847                     td->o.do_verify &&
 848                     td->o.verify != VERIFY_NONE)
 849                         log_io_piece(td, io_u);
 850
 851                 icd->bytes_done[idx] += bytes;
 852
 853                 if (io_u->end_io) {
 854                         ret = io_u->end_io(td, io_u);
 855                         if (ret && !icd->error)
 856                                 icd->error = ret;
 857                 }
 858         } else {
 859                 icd->error = io_u->error;
 860                 io_u_log_error(td, io_u);
 861         }
 862 }
 863
 864 static void init_icd(struct io_completion_data *icd, int nr)
 865 {
 866         fio_gettime(&icd->time, NULL);
 867
 868         icd->nr = nr;
 869
 870         icd->error = 0;
 871         icd->bytes_done[0] = icd->bytes_done[1] = 0;
 872 }
 873
 874 static void ios_completed(struct thread_data *td,
 875                           struct io_completion_data *icd)
 876 {
 877         struct io_u *io_u;
 878         int i;
 879
 880         for (i = 0; i < icd->nr; i++) {
 881                 io_u = td->io_ops->event(td, i);
 882
 883                 io_completed(td, io_u, icd);
 884                 put_io_u(td, io_u);
 885         }
 886 }
 887
 888 /*
 889  * Complete a single io_u for the sync engines.
 890  */
 891 long io_u_sync_complete(struct thread_data *td, struct io_u *io_u)
 892 {
 893         struct io_completion_data icd;
 894
 895         init_icd(&icd, 1);
 896         io_completed(td, io_u, &icd);
 897         put_io_u(td, io_u);
 898
 899         if (!icd.error)
 900                 return icd.bytes_done[0] + icd.bytes_done[1];
 901
 902         td_verror(td, icd.error, "io_u_sync_complete");
 903         return -1;
 904 }
 905
 906 /*
 907  * Called to complete min_events number of io for the async engines.
 908  */
 909 long io_u_queued_complete(struct thread_data *td, int min_events)
 910 {
 911         struct io_completion_data icd;
 912         struct timespec *tvp = NULL;
 913         int ret;
 914         struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
 915
 916         dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_events);
 917
 918         if (!min_events)
 919                 tvp = &ts;
 920
 921         ret = td_io_getevents(td, min_events, td->cur_depth, tvp);
 922         if (ret < 0) {
 923                 td_verror(td, -ret, "td_io_getevents");
 924                 return ret;
 925         } else if (!ret)
 926                 return ret;
 927
 928         init_icd(&icd, ret);
 929         ios_completed(td, &icd);
 930         if (!icd.error)
 931                 return icd.bytes_done[0] + icd.bytes_done[1];
 932
 933         td_verror(td, icd.error, "io_u_queued_complete");
 934         return -1;
 935 }
 936
 937 /*
 938  * Call when io_u is really queued, to update the submission latency.
 939  */
 940 void io_u_queued(struct thread_data *td, struct io_u *io_u)
 941 {
 942         unsigned long slat_time;
 943
 944         slat_time = utime_since(&io_u->start_time, &io_u->issue_time);
 945         add_slat_sample(td, io_u->ddir, slat_time);
 946 }
 947
 948 #ifdef FIO_USE_TIMEOUT
 949 void io_u_set_timeout(struct thread_data *td)
 950 {
 951         assert(td->cur_depth);
 952
 953         td->timer.it_interval.tv_sec = 0;
 954         td->timer.it_interval.tv_usec = 0;
 955         td->timer.it_value.tv_sec = IO_U_TIMEOUT + IO_U_TIMEOUT_INC;
 956         td->timer.it_value.tv_usec = 0;
 957         setitimer(ITIMER_REAL, &td->timer, NULL);
 958         fio_gettime(&td->timeout_end, NULL);
 959 }
 960
 961 static void io_u_dump(struct io_u *io_u)
 962 {
 963         unsigned long t_start = mtime_since_now(&io_u->start_time);
 964         unsigned long t_issue = mtime_since_now(&io_u->issue_time);
 965
 966         log_err("io_u=%p, t_start=%lu, t_issue=%lu\n", io_u, t_start, t_issue);
 967         log_err("  buf=%p/%p, len=%lu/%lu, offset=%llu\n", io_u->buf,
 968                                                 io_u->xfer_buf, io_u->buflen,
 969                                                 io_u->xfer_buflen,
 970                                                 io_u->offset);
 971         log_err("  ddir=%d, fname=%s\n", io_u->ddir, io_u->file->file_name);
 972 }
 973 #else
 974 void io_u_set_timeout(struct thread_data fio_unused *td)
 975 {
 976 }
 977 #endif
 978
 979 #ifdef FIO_USE_TIMEOUT
 980 static void io_u_timeout_handler(int fio_unused sig)
 981 {
 982         struct thread_data *td, *__td;
 983         pid_t pid = getpid();
 984         struct list_head *entry;
 985         struct io_u *io_u;
 986         int i;
 987
 988         log_err("fio: io_u timeout\n");
 989
 990         /*
 991          * TLS would be nice...
 992          */
 993         td = NULL;
 994         for_each_td(__td, i) {
 995                 if (__td->pid == pid) {
 996                         td = __td;
 997                         break;
 998                 }
 999         }
1000
1001         if (!td) {
1002                 log_err("fio: io_u timeout, can't find job\n");
1003                 exit(1);
1004         }
1005
1006         if (!td->cur_depth) {
1007                 log_err("fio: timeout without pending work?\n");
1008                 return;
1009         }
1010
1011         log_err("fio: io_u timeout: job=%s, pid=%d\n", td->o.name, td->pid);
1012
1013         list_for_each(entry, &td->io_u_busylist) {
1014                 io_u = list_entry(entry, struct io_u, list);
1015
1016                 io_u_dump(io_u);
1017         }
1018
1019         td_verror(td, ETIMEDOUT, "io_u timeout");
1020         exit(1);
1021 }
1022 #endif
1023
1024 void io_u_init_timeout(void)
1025 {
1026 #ifdef FIO_USE_TIMEOUT
1027         signal(SIGALRM, io_u_timeout_handler);
1028 #endif
1029 }