[PATCH] fio: print whether we will use mmap or not
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
e128065d 29#include <math.h>
49d2caab 30#include <assert.h>
189873de 31#include <pthread.h>
892199bd
JA
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/wait.h>
892199bd
JA
35#include <sys/ipc.h>
36#include <sys/shm.h>
c94deb1c 37#include <sys/ioctl.h>
6e2c38cc 38#include <sys/mman.h>
892199bd
JA
39#include <asm/unistd.h>
40
27c32a38 41#include "fio.h"
892199bd 42
892199bd
JA
43#define MASK (4095)
44
4240cfa1 45#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 46
27c32a38
JA
47int groupid = 0;
48int thread_number = 0;
49char run_str[MAX_JOBS + 1];
50int shm_id = 0;
892199bd 51
02bdd9ba
JA
52/*
53 * thread life cycle
54 */
55enum {
56 TD_NOT_CREATED = 0,
57 TD_CREATED,
e8457004
JA
58 TD_RUNNING,
59 TD_VERIFYING,
02bdd9ba
JA
60 TD_EXITED,
61 TD_REAPED,
62};
63
2c83567e
JA
64/*
65 * The io unit
66 */
67struct io_u {
68 struct iocb iocb;
57d753e3 69 struct timeval start_time;
2c83567e
JA
70 struct timeval issue_time;
71
2c83567e
JA
72 char *buf;
73 unsigned int buflen;
4ac89145 74 unsigned long long offset;
2c83567e
JA
75
76 struct list_head list;
77};
78
62bb4285 79#define should_fsync(td) (td_write(td) && !(td)->odirect)
02983297 80
892199bd
JA
81static sem_t startup_sem;
82
27c32a38
JA
83#define TERMINATE_ALL (-1)
84
85static void terminate_threads(int groupid)
892199bd
JA
86{
87 int i;
88
213b446c
JA
89 for (i = 0; i < thread_number; i++) {
90 struct thread_data *td = &threads[i];
91
27c32a38
JA
92 if (groupid == TERMINATE_ALL || groupid == td->groupid) {
93 td->terminate = 1;
94 td->start_delay = 0;
95 }
213b446c 96 }
02bdd9ba
JA
97}
98
27c32a38 99static void sig_handler(int sig)
946d8870 100{
27c32a38 101 terminate_threads(TERMINATE_ALL);
892199bd
JA
102}
103
5c24b2c4 104static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
105{
106 double sec, usec;
107
108 sec = e->tv_sec - s->tv_sec;
109 usec = e->tv_usec - s->tv_usec;
110 if (sec > 0 && usec < 0) {
111 sec--;
112 usec += 1000000;
113 }
114
115 sec *= (double) 1000000;
116
117 return sec + usec;
118}
119
fd11d7af
JA
120static unsigned long utime_since_now(struct timeval *s)
121{
122 struct timeval t;
123
124 gettimeofday(&t, NULL);
125 return utime_since(s, &t);
126}
127
5c24b2c4 128static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
129{
130 double sec, usec;
131
132 sec = e->tv_sec - s->tv_sec;
133 usec = e->tv_usec - s->tv_usec;
134 if (sec > 0 && usec < 0) {
135 sec--;
136 usec += 1000000;
137 }
138
139 sec *= (double) 1000;
140 usec /= (double) 1000;
141
142 return sec + usec;
143}
144
be33abe4
JA
145static unsigned long mtime_since_now(struct timeval *s)
146{
147 struct timeval t;
148
149 gettimeofday(&t, NULL);
150 return mtime_since(s, &t);
151}
152
98168d55
JA
153static inline unsigned long msec_now(struct timeval *s)
154{
155 return s->tv_sec * 1000 + s->tv_usec / 1000;
156}
157
49d2caab
JA
158static int random_map_free(struct thread_data *td, unsigned long long block)
159{
75b2ab2c
JA
160 unsigned int idx = RAND_MAP_IDX(td, block);
161 unsigned int bit = RAND_MAP_BIT(td, block);
49d2caab
JA
162
163 return (td->file_map[idx] & (1UL << bit)) == 0;
164}
165
166static int get_next_free_block(struct thread_data *td, unsigned long long *b)
892199bd 167{
49d2caab
JA
168 int i;
169
170 *b = 0;
171 i = 0;
172 while ((*b) * td->min_bs < td->io_size) {
173 if (td->file_map[i] != -1UL) {
174 *b += ffz(td->file_map[i]);
175 return 0;
176 }
177
178 *b += BLOCKS_PER_MAP;
179 i++;
180 }
181
182 return 1;
183}
184
185static void mark_random_map(struct thread_data *td, struct io_u *io_u)
186{
187 unsigned long block = io_u->offset / td->min_bs;
188 unsigned int blocks = 0;
189
190 while (blocks < (io_u->buflen / td->min_bs)) {
191 int idx, bit;
192
193 if (!random_map_free(td, block))
194 break;
195
75b2ab2c
JA
196 idx = RAND_MAP_IDX(td, block);
197 bit = RAND_MAP_BIT(td, block);
49d2caab
JA
198
199 assert(idx < td->num_maps);
200
201 td->file_map[idx] |= (1UL << bit);
202 block++;
203 blocks++;
204 }
205
206 if ((blocks * td->min_bs) < io_u->buflen)
207 io_u->buflen = blocks * td->min_bs;
208}
209
210static int get_next_offset(struct thread_data *td, unsigned long long *offset)
211{
75b2ab2c 212 unsigned long long b, rb;
d32d9284 213 long r;
892199bd
JA
214
215 if (!td->sequential) {
49d2caab
JA
216 unsigned long max_blocks = td->io_size / td->min_bs;
217 int loops = 50;
218
219 do {
220 lrand48_r(&td->random_state, &r);
221 b = ((max_blocks - 1) * r / (RAND_MAX+1.0));
75b2ab2c 222 rb = b + (td->file_offset / td->min_bs);
49d2caab 223 loops--;
75b2ab2c 224 } while (!random_map_free(td, rb) && loops);
49d2caab
JA
225
226 if (!loops) {
227 if (get_next_free_block(td, &b))
228 return 1;
229 }
7889f07b 230 } else
49d2caab 231 b = td->last_bytes / td->min_bs;
7889f07b 232
49d2caab 233 *offset = (b * td->min_bs) + td->file_offset;
75b2ab2c
JA
234 if (*offset > td->file_size)
235 return 1;
236
49d2caab 237 return 0;
7889f07b
JA
238}
239
240static unsigned int get_next_buflen(struct thread_data *td)
241{
242 unsigned int buflen;
d32d9284 243 long r;
7889f07b
JA
244
245 if (td->min_bs == td->max_bs)
246 buflen = td->min_bs;
247 else {
d32d9284 248 lrand48_r(&td->bsrange_state, &r);
7889f07b
JA
249 buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
250 buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
892199bd
JA
251 }
252
49d2caab
JA
253 if (buflen > td->io_size - td->this_io_bytes)
254 buflen = td->io_size - td->this_io_bytes;
7889f07b 255
7889f07b 256 return buflen;
892199bd
JA
257}
258
57d753e3
JA
259static inline void add_stat_sample(struct thread_data *td, struct io_stat *is,
260 unsigned long val)
892199bd 261{
57d753e3
JA
262 if (val > is->max_val)
263 is->max_val = val;
264 if (val < is->min_val)
265 is->min_val = val;
266
267 is->val += val;
268 is->val_sq += val * val;
269 is->samples++;
270}
fd1ae4c9 271
a0a9b35b
JA
272static void add_log_sample(struct thread_data *td, struct io_log *log,
273 unsigned long val)
274{
275 if (log->nr_samples == log->max_samples) {
276 int new_size = sizeof(struct io_sample) * log->max_samples * 2;
277
278 log->log = realloc(log->log, new_size);
279 log->max_samples <<= 1;
280 }
281
282 log->log[log->nr_samples].val = val;
283 log->log[log->nr_samples].time = mtime_since_now(&td->start);
284 log->nr_samples++;
285}
286
57d753e3
JA
287static void add_clat_sample(struct thread_data *td, unsigned long msec)
288{
289 add_stat_sample(td, &td->clat_stat, msec);
a0a9b35b
JA
290
291 if (td->lat_log)
292 add_log_sample(td, td->lat_log, msec);
57d753e3 293}
fd1ae4c9 294
57d753e3
JA
295static void add_slat_sample(struct thread_data *td, unsigned long msec)
296{
297 add_stat_sample(td, &td->slat_stat, msec);
298}
fd1ae4c9 299
645785e5 300static void add_bw_sample(struct thread_data *td)
57d753e3
JA
301{
302 unsigned long spent = mtime_since_now(&td->stat_sample_time);
303 unsigned long rate;
304
1d035750 305 if (spent < td->bw_avg_time)
57d753e3
JA
306 return;
307
49d2caab 308 rate = (td->this_io_bytes - td->stat_io_bytes) / spent;
57d753e3
JA
309 add_stat_sample(td, &td->bw_stat, rate);
310
a0a9b35b
JA
311 if (td->bw_log)
312 add_log_sample(td, td->bw_log, rate);
313
57d753e3 314 gettimeofday(&td->stat_sample_time, NULL);
49d2caab 315 td->stat_io_bytes = td->this_io_bytes;
892199bd
JA
316}
317
fd11d7af
JA
318/*
319 * busy looping version for the last few usec
320 */
321static void __usec_sleep(int usec)
322{
323 struct timeval start;
324
325 gettimeofday(&start, NULL);
326 while (utime_since_now(&start) < usec)
3782a8cd 327 nop;
fd11d7af
JA
328}
329
5c24b2c4 330static void usec_sleep(int usec)
892199bd 331{
86184d14
JA
332 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
333 struct timespec rem;
892199bd
JA
334
335 do {
fd11d7af
JA
336 if (usec < 5000) {
337 __usec_sleep(usec);
338 break;
339 }
86184d14
JA
340 rem.tv_sec = rem.tv_nsec = 0;
341 nanosleep(&req, &rem);
342 if (!rem.tv_nsec)
892199bd 343 break;
86184d14
JA
344
345 req.tv_nsec = rem.tv_nsec;
fd11d7af 346 usec = rem.tv_nsec * 1000;
892199bd
JA
347 } while (1);
348}
349
9e850933
JA
350static void rate_throttle(struct thread_data *td, unsigned long time_spent,
351 unsigned int bytes)
86184d14 352{
9e850933
JA
353 unsigned long usec_cycle;
354
4240cfa1
JA
355 if (!td->rate)
356 return;
357
9e850933
JA
358 usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
359
360 if (time_spent < usec_cycle) {
361 unsigned long s = usec_cycle - time_spent;
86184d14
JA
362
363 td->rate_pending_usleep += s;
fad86e6a 364 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
365 usec_sleep(td->rate_pending_usleep);
366 td->rate_pending_usleep = 0;
367 }
4240cfa1 368 } else {
9e850933 369 long overtime = time_spent - usec_cycle;
42b2b9fe 370
4240cfa1
JA
371 td->rate_pending_usleep -= overtime;
372 }
373}
374
5c24b2c4 375static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 376{
7607bc6b 377 unsigned long spent;
4240cfa1
JA
378 unsigned long rate;
379
380 /*
381 * allow a 2 second settle period in the beginning
382 */
7607bc6b 383 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
384 return 0;
385
386 /*
387 * if rate blocks is set, sample is running
388 */
49d2caab 389 if (td->rate_bytes) {
4240cfa1
JA
390 spent = mtime_since(&td->lastrate, now);
391 if (spent < td->ratecycle)
392 return 0;
393
49d2caab 394 rate = (td->this_io_bytes - td->rate_bytes) / spent;
4240cfa1
JA
395 if (rate < td->ratemin) {
396 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 397 if (rate_quit)
27c32a38 398 terminate_threads(td->groupid);
4240cfa1
JA
399 return 1;
400 }
86184d14 401 }
4240cfa1 402
49d2caab 403 td->rate_bytes = td->this_io_bytes;
4240cfa1
JA
404 memcpy(&td->lastrate, now, sizeof(*now));
405 return 0;
86184d14
JA
406}
407
67903a2e
JA
408static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
409{
01f79976
JA
410 if (!td->timeout)
411 return 0;
67903a2e
JA
412 if (mtime_since(&td->start, t) >= td->timeout * 1000)
413 return 1;
414
415 return 0;
416}
417
e8457004
JA
418static void fill_random_bytes(struct thread_data *td,
419 unsigned char *p, unsigned int len)
420{
645785e5 421 unsigned int todo;
40ef7f64 422 double r;
e8457004
JA
423
424 while (len) {
40ef7f64 425 drand48_r(&td->verify_state, &r);
e8457004 426
40ef7f64
JA
427 /*
428 * lrand48_r seems to be broken and only fill the bottom
429 * 32-bits, even on 64-bit archs with 64-bit longs
430 */
431 todo = sizeof(r);
e8457004
JA
432 if (todo > len)
433 todo = len;
434
435 memcpy(p, &r, todo);
436
437 len -= todo;
438 p += todo;
439 }
440}
441
9d0c6ca2
JA
442static void hexdump(void *buffer, int len)
443{
444 unsigned char *p = buffer;
445 int i;
446
447 for (i = 0; i < len; i++)
448 printf("%02x", p[i]);
449 printf("\n");
450}
451
645785e5 452static int verify_io_u(struct io_u *io_u)
e8457004
JA
453{
454 struct verify_header *hdr = (struct verify_header *) io_u->buf;
455 unsigned char *p = (unsigned char *) io_u->buf;
456 struct md5_ctx md5_ctx;
9d0c6ca2 457 int ret;
e8457004 458
840b216f 459 if (hdr->fio_magic != FIO_HDR_MAGIC)
e8457004
JA
460 return 1;
461
462 memset(&md5_ctx, 0, sizeof(md5_ctx));
463 p += sizeof(*hdr);
464 md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
465
9d0c6ca2
JA
466 ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
467 if (ret) {
468 hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
469 hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
470 }
471
472 return ret;
e8457004
JA
473}
474
cfc702bd
JA
475/*
476 * fill body of io_u->buf with random data and add a header with the
477 * (eg) sha1sum of that data.
478 */
e8457004 479static void populate_io_u(struct thread_data *td, struct io_u *io_u)
cfc702bd 480{
e8457004
JA
481 struct md5_ctx md5_ctx;
482 struct verify_header hdr;
483 unsigned char *p = (unsigned char *) io_u->buf;
484
485 hdr.fio_magic = FIO_HDR_MAGIC;
486 hdr.len = io_u->buflen;
487 p += sizeof(hdr);
488 fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
489
490 memset(&md5_ctx, 0, sizeof(md5_ctx));
491 md5_update(&md5_ctx, p, io_u->buflen - sizeof(hdr));
492 memcpy(hdr.md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
493 memcpy(io_u->buf, &hdr, sizeof(hdr));
cfc702bd
JA
494}
495
2c83567e
JA
496static void put_io_u(struct thread_data *td, struct io_u *io_u)
497{
498 list_del(&io_u->list);
499 list_add(&io_u->list, &td->io_u_freelist);
500 td->cur_depth--;
501}
502
f0f3411b
JA
503#define queue_full(td) (list_empty(&(td)->io_u_freelist))
504
e8457004
JA
505static struct io_u *__get_io_u(struct thread_data *td)
506{
507 struct io_u *io_u;
508
f0f3411b 509 if (queue_full(td))
e8457004
JA
510 return NULL;
511
512 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
513 list_del(&io_u->list);
514 list_add(&io_u->list, &td->io_u_busylist);
f4bb2243 515 td->cur_depth++;
e8457004
JA
516 return io_u;
517}
518
2c83567e
JA
519static struct io_u *get_io_u(struct thread_data *td)
520{
521 struct io_u *io_u;
522
e8457004
JA
523 io_u = __get_io_u(td);
524 if (!io_u)
2c83567e
JA
525 return NULL;
526
406e7b7c
JA
527 if (get_next_offset(td, &io_u->offset)) {
528 put_io_u(td, io_u);
49d2caab 529 return NULL;
406e7b7c 530 }
49d2caab 531
b2a369fb
JA
532 io_u->buflen = get_next_buflen(td);
533 if (!io_u->buflen) {
e8457004 534 put_io_u(td, io_u);
7889f07b 535 return NULL;
e8457004 536 }
2c83567e 537
75b2ab2c
JA
538 if (io_u->buflen + io_u->offset > td->file_size)
539 io_u->buflen = td->file_size - io_u->offset;
49d2caab
JA
540
541 if (!td->sequential)
542 mark_random_map(td, io_u);
543
544 td->last_bytes += io_u->buflen;
545
9d0c6ca2 546 if (td->verify)
e8457004 547 populate_io_u(td, io_u);
cfc702bd 548
2c83567e
JA
549 if (td->use_aio) {
550 if (td_read(td))
551 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
552 else
553 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
554 }
555
57d753e3 556 gettimeofday(&io_u->start_time, NULL);
2c83567e
JA
557 return io_u;
558}
559
40ef7f64
JA
560static inline void td_set_runstate(struct thread_data *td, int runstate)
561{
562 td->old_runstate = td->runstate;
563 td->runstate = runstate;
564}
565
645785e5
JA
566static int get_next_verify(struct thread_data *td,
567 unsigned long long *offset, unsigned int *len)
568{
569 struct io_piece *ipo;
570
571 if (list_empty(&td->io_hist_list))
572 return 1;
573
574 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
575 list_del(&ipo->list);
576
577 *offset = ipo->offset;
578 *len = ipo->len;
579 free(ipo);
580 return 0;
581}
582
9d0c6ca2
JA
583static void prune_io_piece_log(struct thread_data *td)
584{
585 struct io_piece *ipo;
586
587 while (!list_empty(&td->io_hist_list)) {
588 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
589
590 list_del(&ipo->list);
591 free(ipo);
592 }
593}
594
9d0c6ca2
JA
595/*
596 * log a succesful write, so we can unwind the log for verify
597 */
598static void log_io_piece(struct thread_data *td, struct io_u *io_u)
599{
49d2caab 600 struct io_piece *ipo = malloc(sizeof(struct io_piece));
9d0c6ca2
JA
601 struct list_head *entry;
602
603 INIT_LIST_HEAD(&ipo->list);
604 ipo->offset = io_u->offset;
605 ipo->len = io_u->buflen;
606
49d2caab
JA
607 /*
608 * for random io where the writes extend the file, it will typically
609 * be laid out with the block scattered as written. it's faster to
610 * read them in in that order again, so don't sort
611 */
612 if (td->sequential || !td->overwrite) {
9d0c6ca2
JA
613 list_add_tail(&ipo->list, &td->io_hist_list);
614 return;
615 }
616
617 /*
618 * for random io, sort the list so verify will run faster
619 */
620 entry = &td->io_hist_list;
621 while ((entry = entry->prev) != &td->io_hist_list) {
622 struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
623
9d0c6ca2
JA
624 if (__ipo->offset < ipo->offset)
625 break;
626 }
627
628 list_add(&ipo->list, entry);
629}
630
91fc5dc9 631static void do_sync_verify(struct thread_data *td)
cfc702bd 632{
40ef7f64 633 struct timeval t;
e8457004 634 struct io_u *io_u = NULL;
645785e5 635 int ret;
e8457004 636
40ef7f64 637 td_set_runstate(td, TD_VERIFYING);
e8457004
JA
638
639 io_u = __get_io_u(td);
640
40ef7f64 641 if (!td->odirect) {
6e2c38cc
JA
642 if (!td->use_mmap) {
643 if (fadvise(td->fd, td->file_offset, td->io_size, POSIX_FADV_DONTNEED) < 0) {
644 td->error = errno;
645 goto out;
646 }
647 } else {
648 if (madvise(td->mmap, td->io_size, MADV_DONTNEED)) {
649 td->error = errno;
650 goto out;
651 }
40ef7f64
JA
652 }
653 }
654
e8457004
JA
655 do {
656 if (td->terminate)
657 break;
40ef7f64
JA
658
659 gettimeofday(&t, NULL);
660 if (runtime_exceeded(td, &t))
661 break;
662
645785e5
JA
663 if (get_next_verify(td, &io_u->offset, &io_u->buflen))
664 break;
665
666 if (td->cur_off != io_u->offset) {
667 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
668 td->error = errno;
669 break;
670 }
671 }
e8457004
JA
672
673 ret = read(td->fd, io_u->buf, io_u->buflen);
674 if (ret < (int) io_u->buflen) {
675 if (ret == -1) {
676 td->error = errno;
677 break;
678 } else if (!ret)
679 break;
680 else
681 io_u->buflen = ret;
682 }
683
645785e5 684 if (verify_io_u(io_u))
e8457004
JA
685 break;
686
645785e5 687 td->cur_off = io_u->offset + io_u->buflen;
e8457004
JA
688 } while (1);
689
690out:
40ef7f64 691 td_set_runstate(td, TD_RUNNING);
e8457004 692 put_io_u(td, io_u);
cfc702bd
JA
693}
694
6e2c38cc
JA
695static int __do_sync_mmap(struct thread_data *td, struct io_u *io_u)
696{
697 unsigned long long real_off = io_u->offset - td->file_offset;
698
699 if (td_read(td))
700 memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
701 else
702 memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
703
704 return io_u->buflen;
705}
706
707static int __do_sync_rw(struct thread_data *td, struct io_u *io_u)
708{
709 if (td->cur_off != io_u->offset) {
710 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
711 td->error = errno;
712 return 1;
713 }
714 }
715
716 if (td_read(td))
717 return read(td->fd, io_u->buf, io_u->buflen);
718 else
719 return write(td->fd, io_u->buf, io_u->buflen);
720}
721
722static void sync_td(struct thread_data *td)
723{
724 if (!td->use_mmap)
725 fsync(td->fd);
726 else
727 msync(td->mmap, td->file_size, MS_SYNC);
728}
729
43000118 730static void do_sync_io(struct thread_data *td)
892199bd 731{
7889f07b 732 unsigned long msec, usec;
e8457004 733 struct io_u *io_u = NULL;
2c83567e 734 struct timeval e;
892199bd 735
49d2caab 736 while (td->this_io_bytes < td->io_size) {
892199bd
JA
737 int ret;
738
739 if (td->terminate)
740 break;
741
2c83567e 742 io_u = get_io_u(td);
7889f07b
JA
743 if (!io_u)
744 break;
2c83567e 745
6e2c38cc
JA
746 if (!td->use_mmap)
747 ret = __do_sync_rw(td, io_u);
892199bd 748 else
6e2c38cc 749 ret = __do_sync_mmap(td, io_u);
892199bd 750
2c83567e 751 if (ret < (int) io_u->buflen) {
892199bd
JA
752 if (ret == -1)
753 td->error = errno;
754 break;
755 }
756
62bb4285 757 if (td_write(td))
645785e5
JA
758 log_io_piece(td, io_u);
759
4240cfa1 760 td->io_blocks++;
49d2caab
JA
761 td->io_bytes += io_u->buflen;
762 td->this_io_bytes += io_u->buflen;
63a09e51 763 td->cur_off = io_u->offset + io_u->buflen;
4240cfa1 764
86184d14
JA
765 gettimeofday(&e, NULL);
766
57d753e3 767 usec = utime_since(&io_u->start_time, &e);
86184d14 768
9e850933 769 rate_throttle(td, usec, io_u->buflen);
892199bd 770
4240cfa1
JA
771 if (check_min_rate(td, &e)) {
772 td->error = ENODATA;
773 break;
774 }
892199bd 775
4240cfa1 776 msec = usec / 1000;
57d753e3 777 add_clat_sample(td, msec);
645785e5 778 add_bw_sample(td);
67903a2e
JA
779
780 if (runtime_exceeded(td, &e))
781 break;
2c83567e 782
cdf92433 783 put_io_u(td, io_u);
e8457004 784 io_u = NULL;
cdf92433 785
e97712ed
JA
786 if (td->thinktime)
787 usec_sleep(td->thinktime);
788
cdf92433
JA
789 if (should_fsync(td) && td->fsync_blocks &&
790 (td->io_blocks % td->fsync_blocks) == 0)
6e2c38cc 791 sync_td(td);
892199bd
JA
792 }
793
e8457004
JA
794 if (io_u)
795 put_io_u(td, io_u);
796
4240cfa1 797 if (should_fsync(td))
6e2c38cc 798 sync_td(td);
892199bd 799}
43000118 800
1ad72b11
JA
801static int io_u_getevents(struct thread_data *td, int min, int max,
802 struct timespec *t)
803{
804 int r;
805
806 do {
807 r = io_getevents(td->aio_ctx, min, max, td->aio_events, t);
808 if (r != -EAGAIN && r != -EINTR)
809 break;
810 } while (1);
811
812 return r;
813}
814
2c83567e 815static int io_u_queue(struct thread_data *td, struct io_u *io_u)
56b0eff0 816{
2c83567e 817 struct iocb *iocb = &io_u->iocb;
56b0eff0
JA
818 int ret;
819
820 do {
254605cd 821 ret = io_submit(td->aio_ctx, 1, &iocb);
56b0eff0
JA
822 if (ret == 1)
823 return 0;
406e7b7c 824 else if (ret == -EAGAIN)
56b0eff0 825 usleep(100);
406e7b7c 826 else if (ret == -EINTR)
a592bd33 827 continue;
56b0eff0
JA
828 else
829 break;
830 } while (1);
831
a592bd33 832 return ret;
56b0eff0
JA
833}
834
98168d55 835#define iocb_time(iocb) ((unsigned long) (iocb)->data)
2c83567e
JA
836#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
837
f0f3411b 838static int ios_completed(struct thread_data *td, int nr)
2c83567e
JA
839{
840 unsigned long msec;
841 struct io_u *io_u;
842 struct timeval e;
9e850933 843 int i, bytes_done;
2c83567e 844
f0f3411b 845 gettimeofday(&e, NULL);
2c83567e 846
9e850933 847 for (i = 0, bytes_done = 0; i < nr; i++) {
2c83567e
JA
848 io_u = ev_to_iou(td->aio_events + i);
849
f0f3411b 850 td->io_blocks++;
49d2caab
JA
851 td->io_bytes += io_u->buflen;
852 td->this_io_bytes += io_u->buflen;
8c033f93 853
f0f3411b 854 msec = mtime_since(&io_u->issue_time, &e);
2c83567e 855
f0f3411b 856 add_clat_sample(td, msec);
645785e5
JA
857 add_bw_sample(td);
858
62bb4285 859 if (td_write(td))
645785e5 860 log_io_piece(td, io_u);
2c83567e 861
f4bb2243 862 bytes_done += io_u->buflen;
2c83567e
JA
863 put_io_u(td, io_u);
864 }
9e850933
JA
865
866 return bytes_done;
2c83567e
JA
867}
868
869static void cleanup_pending_aio(struct thread_data *td)
870{
871 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
872 struct list_head *entry, *n;
873 struct io_u *io_u;
874 int r;
875
876 /*
877 * get immediately available events, if any
878 */
1ad72b11 879 r = io_u_getevents(td, 0, td->cur_depth, &ts);
2c83567e 880 if (r > 0)
f0f3411b 881 ios_completed(td, r);
2c83567e
JA
882
883 /*
884 * now cancel remaining active events
885 */
886 list_for_each_safe(entry, n, &td->io_u_busylist) {
887 io_u = list_entry(entry, struct io_u, list);
888
889 r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events);
890 if (!r)
891 put_io_u(td, io_u);
892 }
893
894 if (td->cur_depth) {
1ad72b11 895 r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
2c83567e 896 if (r > 0)
f0f3411b 897 ios_completed(td, r);
2c83567e
JA
898 }
899}
98168d55 900
d32d9284
JA
901static int async_do_verify(struct thread_data *td, struct io_u **io_u)
902{
903 struct io_u *v_io_u = *io_u;
904 int ret = 0;
905
906 if (v_io_u) {
645785e5 907 ret = verify_io_u(v_io_u);
d32d9284
JA
908 put_io_u(td, v_io_u);
909 *io_u = NULL;
910 }
911
912 return ret;
913}
914
91fc5dc9 915static void do_async_verify(struct thread_data *td)
cfc702bd 916{
f4bb2243 917 struct timeval t;
d32d9284 918 struct io_u *io_u, *v_io_u = NULL;
645785e5 919 int ret;
f4bb2243
JA
920
921 td_set_runstate(td, TD_VERIFYING);
922
f4bb2243
JA
923 do {
924 if (td->terminate)
925 break;
926
927 gettimeofday(&t, NULL);
928 if (runtime_exceeded(td, &t))
929 break;
930
931 io_u = __get_io_u(td);
932 if (!io_u)
933 break;
934
645785e5
JA
935 if (get_next_verify(td, &io_u->offset, &io_u->buflen)) {
936 put_io_u(td, io_u);
937 break;
f4bb2243
JA
938 }
939
940 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
941 ret = io_u_queue(td, io_u);
942 if (ret) {
943 put_io_u(td, io_u);
944 td->error = ret;
945 break;
946 }
947
f0f3411b
JA
948 /*
949 * we have one pending to verify, do that while the next
950 * we are doing io on the next one
951 */
d32d9284
JA
952 if (async_do_verify(td, &v_io_u))
953 break;
f0f3411b 954
1ad72b11 955 ret = io_u_getevents(td, 1, 1, NULL);
f0f3411b
JA
956 if (ret != 1) {
957 if (ret < 0)
958 td->error = ret;
f4bb2243
JA
959 break;
960 }
961
f0f3411b 962 v_io_u = ev_to_iou(td->aio_events);
f4bb2243 963
645785e5 964 td->cur_off = v_io_u->offset + v_io_u->buflen;
f0f3411b
JA
965
966 /*
d32d9284 967 * if we can't submit more io, we need to verify now
f0f3411b 968 */
d32d9284
JA
969 if (queue_full(td) && async_do_verify(td, &v_io_u))
970 break;
971
f4bb2243
JA
972 } while (1);
973
d32d9284 974 async_do_verify(td, &v_io_u);
f0f3411b 975
f4bb2243
JA
976 if (td->cur_depth)
977 cleanup_pending_aio(td);
978
979 td_set_runstate(td, TD_RUNNING);
cfc702bd
JA
980}
981
43000118
JA
982static void do_async_io(struct thread_data *td)
983{
984 struct timeval s, e;
7889f07b 985 unsigned long usec;
43000118 986
49d2caab 987 while (td->this_io_bytes < td->io_size) {
43000118
JA
988 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
989 struct timespec *timeout;
2c83567e
JA
990 int ret, min_evts = 0;
991 struct io_u *io_u;
9e850933 992 unsigned int bytes_done;
43000118
JA
993
994 if (td->terminate)
995 break;
996
2c83567e 997 io_u = get_io_u(td);
7889f07b
JA
998 if (!io_u)
999 break;
43000118 1000
57d753e3 1001 memcpy(&s, &io_u->start_time, sizeof(s));
8baf1bcc 1002
2c83567e 1003 ret = io_u_queue(td, io_u);
56b0eff0 1004 if (ret) {
a3fdb993 1005 put_io_u(td, io_u);
a592bd33 1006 td->error = ret;
43000118
JA
1007 break;
1008 }
1009
57d753e3
JA
1010 gettimeofday(&io_u->issue_time, NULL);
1011 add_slat_sample(td, mtime_since(&io_u->start_time, &io_u->issue_time));
2c83567e 1012 if (td->cur_depth < td->aio_depth) {
43000118
JA
1013 timeout = &ts;
1014 min_evts = 0;
1015 } else {
1016 timeout = NULL;
1017 min_evts = 1;
1018 }
1019
1ad72b11 1020 ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
43000118 1021 if (ret < 0) {
406e7b7c 1022 td->error = ret;
43000118
JA
1023 break;
1024 } else if (!ret)
1025 continue;
1026
f0f3411b 1027 bytes_done = ios_completed(td, ret);
43000118 1028
98168d55
JA
1029 /*
1030 * the rate is batched for now, it should work for batches
1031 * of completions except the very first one which may look
1032 * a little bursty
1033 */
2c83567e 1034 gettimeofday(&e, NULL);
43000118
JA
1035 usec = utime_since(&s, &e);
1036
9e850933 1037 rate_throttle(td, usec, bytes_done);
43000118
JA
1038
1039 if (check_min_rate(td, &e)) {
1040 td->error = ENODATA;
1041 break;
1042 }
67903a2e
JA
1043
1044 if (runtime_exceeded(td, &e))
1045 break;
765d9223
JA
1046
1047 if (td->thinktime)
1048 usec_sleep(td->thinktime);
cdf92433
JA
1049
1050 if (should_fsync(td) && td->fsync_blocks &&
1051 (td->io_blocks % td->fsync_blocks) == 0)
1052 fsync(td->fd);
43000118 1053 }
56b0eff0 1054
2c83567e
JA
1055 if (td->cur_depth)
1056 cleanup_pending_aio(td);
4ac89145
JA
1057
1058 if (should_fsync(td))
1059 fsync(td->fd);
56b0eff0
JA
1060}
1061
1062static void cleanup_aio(struct thread_data *td)
1063{
254605cd
JA
1064 io_destroy(td->aio_ctx);
1065
43000118
JA
1066 if (td->aio_events)
1067 free(td->aio_events);
43000118
JA
1068}
1069
1070static int init_aio(struct thread_data *td)
1071{
254605cd 1072 if (io_queue_init(td->aio_depth, &td->aio_ctx)) {
43000118
JA
1073 td->error = errno;
1074 return 1;
1075 }
1076
43000118 1077 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
43000118
JA
1078 return 0;
1079}
1080
2c83567e
JA
1081static void cleanup_io_u(struct thread_data *td)
1082{
1083 struct list_head *entry, *n;
1084 struct io_u *io_u;
1085
1086 list_for_each_safe(entry, n, &td->io_u_freelist) {
1087 io_u = list_entry(entry, struct io_u, list);
1088
1089 list_del(&io_u->list);
2c83567e
JA
1090 free(io_u);
1091 }
6b71c826 1092
99c6704f
JA
1093 if (td->mem_type == MEM_MALLOC)
1094 free(td->orig_buffer);
1095 else if (td->mem_type == MEM_SHM) {
1096 struct shmid_ds sbuf;
1097
1098 shmdt(td->orig_buffer);
1099 shmctl(td->shm_id, IPC_RMID, &sbuf);
1100 }
2c83567e
JA
1101}
1102
99c6704f 1103static int init_io_u(struct thread_data *td)
2c83567e
JA
1104{
1105 struct io_u *io_u;
99c6704f 1106 int i, max_units, mem_size;
6b71c826 1107 char *p;
2c83567e
JA
1108
1109 if (!td->use_aio)
1110 max_units = 1;
1111 else
1112 max_units = td->aio_depth;
1113
7889f07b 1114 mem_size = td->max_bs * max_units + MASK;
99c6704f
JA
1115
1116 if (td->mem_type == MEM_MALLOC)
1117 td->orig_buffer = malloc(mem_size);
1118 else if (td->mem_type == MEM_SHM) {
1119 td->shm_id = shmget(IPC_PRIVATE, mem_size, IPC_CREAT | 0600);
1120 if (td->shm_id < 0) {
1121 td->error = errno;
1122 perror("shmget");
1123 return 1;
1124 }
1125
1126 td->orig_buffer = shmat(td->shm_id, NULL, 0);
1127 if (td->orig_buffer == (void *) -1) {
1128 td->error = errno;
1129 perror("shmat");
1130 return 1;
1131 }
1132 }
6b71c826 1133
2c83567e
JA
1134 INIT_LIST_HEAD(&td->io_u_freelist);
1135 INIT_LIST_HEAD(&td->io_u_busylist);
645785e5 1136 INIT_LIST_HEAD(&td->io_hist_list);
2c83567e 1137
99c6704f 1138 p = ALIGN(td->orig_buffer);
2c83567e
JA
1139 for (i = 0; i < max_units; i++) {
1140 io_u = malloc(sizeof(*io_u));
1141 memset(io_u, 0, sizeof(*io_u));
1142 INIT_LIST_HEAD(&io_u->list);
1143
7889f07b 1144 io_u->buf = p + td->max_bs * i;
2c83567e
JA
1145 list_add(&io_u->list, &td->io_u_freelist);
1146 }
99c6704f
JA
1147
1148 return 0;
2c83567e
JA
1149}
1150
02983297
JA
1151static int create_file(struct thread_data *td)
1152{
7889f07b 1153 unsigned long long left;
645785e5 1154 unsigned int bs;
02983297 1155 char *b;
645785e5 1156 int r;
02983297 1157
02983297
JA
1158 /*
1159 * unless specifically asked for overwrite, let normal io extend it
1160 */
62bb4285 1161 if (td_write(td) && !td->overwrite)
02983297
JA
1162 return 0;
1163
57d753e3
JA
1164 if (!td->file_size) {
1165 fprintf(stderr, "Need size for create\n");
1166 td->error = EINVAL;
1167 return 1;
1168 }
1169
42fd89a7
JA
1170 printf("Client%d: Laying out IO file\n", td->thread_number);
1171
02983297
JA
1172 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1173 if (td->fd < 0) {
1174 td->error = errno;
1175 return 1;
1176 }
1177
c94deb1c
JA
1178 if (ftruncate(td->fd, td->file_size) == -1) {
1179 td->error = errno;
1180 return 1;
1181 }
1182
49d2caab 1183 td->io_size = td->file_size;
7889f07b
JA
1184 b = malloc(td->max_bs);
1185 memset(b, 0, td->max_bs);
1186
1187 left = td->file_size;
1188 while (left) {
1189 bs = td->max_bs;
1190 if (bs > left)
1191 bs = left;
02983297 1192
7889f07b 1193 r = write(td->fd, b, bs);
02983297 1194
645785e5 1195 if (r == (int) bs) {
7889f07b 1196 left -= bs;
02983297 1197 continue;
7889f07b 1198 } else {
02983297
JA
1199 if (r < 0)
1200 td->error = errno;
1201 else
1202 td->error = EIO;
1203
1204 break;
1205 }
1206 }
1207
fc097bfe
JA
1208 if (td->create_fsync)
1209 fsync(td->fd);
1210
02983297
JA
1211 close(td->fd);
1212 td->fd = -1;
1213 free(b);
1214 return 0;
1215}
1216
1217static int file_exists(struct thread_data *td)
1218{
1219 struct stat st;
1220
1221 if (stat(td->file_name, &st) != -1)
1222 return 1;
1223
1224 return errno != ENOENT;
1225}
1226
c4c8f7b3 1227static int file_size(struct thread_data *td)
02983297
JA
1228{
1229 struct stat st;
c94deb1c
JA
1230
1231 if (fstat(td->fd, &st) == -1) {
1232 td->error = errno;
1233 return 1;
1234 }
1235
c94deb1c 1236 if (td_read(td)) {
c4c8f7b3
JA
1237 if (!td->file_size || td->file_size > st.st_size)
1238 td->file_size = st.st_size;
c94deb1c
JA
1239 } else {
1240 if (!td->file_size)
1241 td->file_size = 1024 * 1024 * 1024;
c4c8f7b3
JA
1242 }
1243
1244 return 0;
1245}
1246
1247static int bdev_size(struct thread_data *td)
1248{
1249 size_t bytes;
c94deb1c 1250
c4c8f7b3
JA
1251 if (ioctl(td->fd, BLKGETSIZE64, &bytes) < 0) {
1252 td->error = errno;
1253 return 1;
c94deb1c
JA
1254 }
1255
c4c8f7b3
JA
1256 if (!td->file_size || (td->file_size > bytes))
1257 td->file_size = bytes;
1258
1259 return 0;
1260}
1261
1262static int get_file_size(struct thread_data *td)
1263{
1264 int ret;
1265
1266 if (td->filetype == FIO_TYPE_FILE)
1267 ret = file_size(td);
1268 else
1269 ret = bdev_size(td);
1270
1271 if (ret)
1272 return ret;
1273
1274 if (td->file_offset > td->file_size) {
c94deb1c
JA
1275 fprintf(stderr, "Client%d: offset larger than length\n", td->thread_number);
1276 return 1;
1277 }
1278
c4c8f7b3 1279 td->io_size = td->file_size - td->file_offset;
c94deb1c
JA
1280 if (td->io_size == 0) {
1281 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
1282 td->error = EINVAL;
1283 return 1;
1284 }
1285
1286 return 0;
1287}
1288
6e2c38cc
JA
1289static int setup_file_mmap(struct thread_data *td)
1290{
1291 int flags;
1292
1293 if (td_read(td))
1294 flags = PROT_READ;
1295 else {
1296 flags = PROT_WRITE;
1297
1298 if (td->verify)
1299 flags |= PROT_READ;
1300 }
1301
1302 td->mmap = mmap(NULL, td->file_size, flags, MAP_SHARED, td->fd, td->file_offset);
1303 if (td->mmap == MAP_FAILED) {
1304 td->mmap = NULL;
1305 td->error = errno;
1306 return 1;
1307 }
1308
1309 if (td->invalidate_cache) {
1310 if (madvise(td->mmap, td->file_size, MADV_DONTNEED) < 0) {
1311 td->error = errno;
1312 return 1;
1313 }
1314 }
1315
1316 if (td->sequential) {
1317 if (madvise(td->mmap, td->file_size, MADV_SEQUENTIAL) < 0) {
1318 td->error = errno;
1319 return 1;
1320 }
1321 } else {
1322 if (madvise(td->mmap, td->file_size, MADV_RANDOM) < 0) {
1323 td->error = errno;
1324 return 1;
1325 }
1326 }
1327
1328 return 0;
1329}
1330
1331static int setup_file_plain(struct thread_data *td)
1332{
1333 if (td->invalidate_cache) {
1334 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
1335 td->error = errno;
1336 return 1;
1337 }
1338 }
1339
1340 if (td->sequential) {
1341 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_SEQUENTIAL) < 0) {
1342 td->error = errno;
1343 return 1;
1344 }
1345 } else {
1346 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_RANDOM) < 0) {
1347 td->error = errno;
1348 return 1;
1349 }
1350 }
1351
1352 return 0;
1353}
1354
c94deb1c
JA
1355static int setup_file(struct thread_data *td)
1356{
02983297
JA
1357 int flags = 0;
1358
1359 if (!file_exists(td)) {
1360 if (!td->create_file) {
1361 td->error = ENOENT;
1362 return 1;
1363 }
1364 if (create_file(td))
1365 return 1;
1366 }
1367
1368 if (td->odirect)
1369 flags |= O_DIRECT;
1370
1371 if (td_read(td))
1372 td->fd = open(td->file_name, flags | O_RDONLY);
1373 else {
1374 if (!td->overwrite)
1375 flags |= O_TRUNC;
74b4b5fb
JA
1376 if (td->sync_io)
1377 flags |= O_SYNC;
6e2c38cc
JA
1378
1379 flags |= O_RDWR;
02983297 1380
e8457004 1381 td->fd = open(td->file_name, flags | O_CREAT, 0600);
02983297
JA
1382 }
1383
1384 if (td->fd == -1) {
1385 td->error = errno;
1386 return 1;
1387 }
1388
c94deb1c 1389 if (get_file_size(td))
49d2caab 1390 return 1;
49d2caab 1391
62bb4285 1392 if (td_write(td) && ftruncate(td->fd, td->file_size) == -1) {
c94deb1c 1393 td->error = errno;
02983297
JA
1394 return 1;
1395 }
1396
6e2c38cc
JA
1397 if (!td->use_mmap)
1398 return setup_file_plain(td);
1399 else
1400 return setup_file_mmap(td);
02983297
JA
1401}
1402
d32d9284
JA
1403static void clear_io_state(struct thread_data *td)
1404{
9d0c6ca2
JA
1405 if (!td->use_aio)
1406 lseek(td->fd, SEEK_SET, 0);
1407
d32d9284 1408 td->cur_off = 0;
49d2caab
JA
1409 td->last_bytes = 0;
1410 td->stat_io_bytes = 0;
1411 td->this_io_bytes = 0;
1412
1413 if (td->file_map)
1414 memset(td->file_map, 0, td->num_maps * sizeof(long));
d32d9284
JA
1415}
1416
f6dcd824
JA
1417static void update_rusage_stat(struct thread_data *td)
1418{
1419 if (!td->runtime)
1420 return;
1421
1422 getrusage(RUSAGE_SELF, &td->ru_end);
1423
1424 td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
1425 td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
1426 td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
1427
1428
1429 memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
1430}
1431
189873de 1432static void *thread_main(void *data)
892199bd 1433{
189873de 1434 struct thread_data *td = data;
02983297 1435 int ret = 1;
892199bd 1436
7292613b 1437 setsid();
892199bd
JA
1438 td->pid = getpid();
1439
99c6704f
JA
1440 if (init_io_u(td))
1441 goto err;
2c83567e 1442
18e0b78c
JA
1443 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
1444 td->error = errno;
1445 goto err;
1446 }
1447
43000118
JA
1448 if (td->use_aio && init_aio(td))
1449 goto err;
1450
f737299d 1451 if (td->ioprio) {
892199bd
JA
1452 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
1453 td->error = errno;
599002b3 1454 goto err;
892199bd
JA
1455 }
1456 }
1457
1458 sem_post(&startup_sem);
1459 sem_wait(&td->mutex);
43000118 1460
fc097bfe
JA
1461 if (!td->create_serialize && setup_file(td))
1462 goto err;
1463
49d2caab
JA
1464 if (init_random_state(td))
1465 goto err;
1466
293753bb 1467 while (td->loops--) {
f6dcd824
JA
1468 getrusage(RUSAGE_SELF, &td->ru_start);
1469 gettimeofday(&td->start, NULL);
1470 memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
293753bb
JA
1471
1472 if (td->ratemin)
1473 memcpy(&td->lastrate, &td->stat_sample_time, sizeof(td->lastrate));
7292613b 1474
d32d9284 1475 clear_io_state(td);
9d0c6ca2 1476 prune_io_piece_log(td);
fd1ae4c9 1477
b2de0ed2 1478 if (!td->use_aio)
b6794fbf 1479 do_sync_io(td);
b2de0ed2
JA
1480 else
1481 do_async_io(td);
1482
91fc5dc9
JA
1483 if (td->error)
1484 break;
1485
f6dcd824
JA
1486 td->runtime += mtime_since_now(&td->start);
1487 update_rusage_stat(td);
1488
b2de0ed2
JA
1489 if (!td->verify)
1490 continue;
cfc702bd 1491
b2de0ed2 1492 clear_io_state(td);
d32d9284 1493
91fc5dc9
JA
1494 if (!td->use_aio)
1495 do_sync_verify(td);
1496 else
1497 do_async_verify(td);
1498
1499 if (td->error)
1500 break;
b6794fbf 1501 }
7292613b 1502
892199bd 1503 ret = 0;
a0a9b35b
JA
1504
1505 if (td->bw_log)
1506 finish_log(td, td->bw_log, "bw");
1507 if (td->lat_log)
1508 finish_log(td, td->lat_log, "lat");
4ac89145 1509
98dd52d6 1510 if (exitall_on_terminate)
27c32a38 1511 terminate_threads(td->groupid);
98dd52d6 1512
892199bd 1513err:
7292613b
JA
1514 if (td->fd != -1) {
1515 close(td->fd);
1516 td->fd = -1;
1517 }
6e2c38cc
JA
1518 if (td->mmap)
1519 munmap(td->mmap, td->file_size);
4ac89145
JA
1520 if (td->use_aio)
1521 cleanup_aio(td);
2c83567e 1522 cleanup_io_u(td);
599002b3 1523 if (ret) {
892199bd 1524 sem_post(&startup_sem);
599002b3
JA
1525 sem_wait(&td->mutex);
1526 }
40ef7f64 1527 td_set_runstate(td, TD_EXITED);
189873de
JA
1528 return NULL;
1529
1530}
1531
1532static void *fork_main(int shm_id, int offset)
1533{
1534 struct thread_data *td;
1535 void *data;
1536
1537 data = shmat(shm_id, NULL, 0);
1538 if (data == (void *) -1) {
1539 perror("shmat");
1540 return NULL;
1541 }
1542
1543 td = data + offset * sizeof(struct thread_data);
1544 thread_main(td);
4240cfa1 1545 shmdt(data);
892199bd
JA
1546 return NULL;
1547}
1548
57d753e3
JA
1549static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
1550 double *mean, double *dev)
1551{
1552 double n;
1553
1554 if (is->samples == 0)
1555 return 0;
1556
1557 *min = is->min_val;
1558 *max = is->max_val;
1559
1560 n = (double) is->samples;
1561 *mean = (double) is->val / n;
1562 *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
1563 return 1;
1564}
1565
557e4102
JA
1566static void show_thread_status(struct thread_data *td,
1567 struct group_run_stats *rs)
892199bd
JA
1568{
1569 int prio, prio_class;
f6dcd824 1570 unsigned long min, max, bw = 0;
92b229ed 1571 double mean, dev, usr_cpu, sys_cpu;
892199bd 1572
49d2caab 1573 if (!td->io_bytes && !td->error)
213b446c
JA
1574 return;
1575
892199bd 1576 if (td->runtime)
49d2caab 1577 bw = td->io_bytes / td->runtime;
892199bd
JA
1578
1579 prio = td->ioprio & 0xff;
1580 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
1581
f6dcd824 1582 printf("Client%d (g=%d): err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->groupid, td->error, td->io_bytes >> 20, bw, td->runtime);
fd1ae4c9 1583
57d753e3
JA
1584 if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev))
1585 printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1586 if (calc_lat(&td->clat_stat, &min, &max, &mean, &dev))
1587 printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
557e4102
JA
1588 if (calc_lat(&td->bw_stat, &min, &max, &mean, &dev)) {
1589 double p_of_agg;
1590
1591 p_of_agg = mean * 100 / (double) rs->agg[td->ddir];
1592 printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
1593 }
92b229ed
JA
1594
1595 if (td->runtime) {
f6dcd824
JA
1596 usr_cpu = (double) td->usr_time * 100 / (double) td->runtime;
1597 sys_cpu = (double) td->sys_time * 100 / (double) td->runtime;
92b229ed
JA
1598 } else {
1599 usr_cpu = 0;
1600 sys_cpu = 0;
1601 }
1602
f6dcd824 1603 printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
892199bd
JA
1604}
1605
3f39453a 1606static void print_thread_status(struct thread_data *td, int nr_running,
8dbff0b1 1607 int t_rate, int m_rate)
3f39453a 1608{
3f39453a
JA
1609 printf("Threads now running: %d", nr_running);
1610 if (m_rate || t_rate)
1611 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
8dbff0b1
JA
1612 printf(" : [%s]\r", run_str);
1613 fflush(stdout);
3f39453a
JA
1614}
1615
40ef7f64
JA
1616static void check_str_update(struct thread_data *td, int n, int t, int m)
1617{
1618 char c = run_str[td->thread_number - 1];
1619
1620 if (td->runstate == td->old_runstate)
1621 return;
1622
1623 switch (td->runstate) {
1624 case TD_REAPED:
1625 c = '_';
1626 break;
f4bb2243
JA
1627 case TD_EXITED:
1628 c = 'E';
1629 break;
40ef7f64 1630 case TD_RUNNING:
af678352
JA
1631 if (td_read(td)) {
1632 if (td->sequential)
1633 c = 'R';
1634 else
1635 c = 'r';
1636 } else {
1637 if (td->sequential)
1638 c = 'W';
1639 else
1640 c = 'w';
1641 }
40ef7f64
JA
1642 break;
1643 case TD_VERIFYING:
1644 c = 'V';
1645 break;
1646 case TD_CREATED:
1647 c = 'C';
1648 break;
1649 case TD_NOT_CREATED:
1650 c = 'P';
1651 break;
1652 default:
1653 printf("state %d\n", td->runstate);
1654 }
1655
1656 run_str[td->thread_number - 1] = c;
1657 print_thread_status(td, n, t, m);
1658 td->old_runstate = td->runstate;
1659}
1660
213b446c 1661static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1662{
213b446c 1663 int i;
02bdd9ba 1664
3f39453a
JA
1665 /*
1666 * reap exited threads (TD_EXITED -> TD_REAPED)
1667 */
02bdd9ba
JA
1668 for (i = 0; i < thread_number; i++) {
1669 struct thread_data *td = &threads[i];
1670
40ef7f64
JA
1671 check_str_update(td, *nr_running, *t_rate, *m_rate);
1672
213b446c
JA
1673 if (td->runstate != TD_EXITED)
1674 continue;
02bdd9ba 1675
40ef7f64 1676 td_set_runstate(td, TD_REAPED);
189873de
JA
1677
1678 if (td->use_thread) {
1679 long ret;
1680
1681 if (pthread_join(td->thread, (void *) &ret))
1682 perror("thread_join");
1683 } else
1684 waitpid(td->pid, NULL, 0);
1685
213b446c
JA
1686 (*nr_running)--;
1687 (*m_rate) -= td->ratemin;
1688 (*t_rate) -= td->rate;
40ef7f64 1689 check_str_update(td, *nr_running, *t_rate, *m_rate);
213b446c 1690 }
02bdd9ba
JA
1691}
1692
fc24389f
JA
1693static void run_threads(char *argv[])
1694{
be33abe4 1695 struct timeval genesis;
fc24389f
JA
1696 struct thread_data *td;
1697 unsigned long spent;
2a81240d 1698 int i, todo, nr_running, m_rate, t_rate, nr_started;
fc24389f 1699
fc24389f
JA
1700 printf("Starting %d threads\n", thread_number);
1701 fflush(stdout);
1702
7292613b
JA
1703 signal(SIGINT, sig_handler);
1704
fc24389f 1705 todo = thread_number;
02bdd9ba 1706 nr_running = 0;
2a81240d 1707 nr_started = 0;
213b446c 1708 m_rate = t_rate = 0;
fc24389f 1709
8bdcfab5
JA
1710 for (i = 0; i < thread_number; i++) {
1711 td = &threads[i];
1712
fc097bfe
JA
1713 if (!td->create_serialize)
1714 continue;
1715
8bdcfab5
JA
1716 /*
1717 * do file setup here so it happens sequentially,
1718 * we don't want X number of threads getting their
1719 * client data interspersed on disk
1720 */
1721 if (setup_file(td)) {
40ef7f64 1722 td_set_runstate(td, TD_REAPED);
8bdcfab5
JA
1723 todo--;
1724 }
1725 }
1726
1727 gettimeofday(&genesis, NULL);
1728
213b446c 1729 while (todo) {
3f39453a
JA
1730 /*
1731 * create threads (TD_NOT_CREATED -> TD_CREATED)
1732 */
fc24389f
JA
1733 for (i = 0; i < thread_number; i++) {
1734 td = &threads[i];
1735
02bdd9ba 1736 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1737 continue;
1738
213b446c
JA
1739 /*
1740 * never got a chance to start, killed by other
1741 * thread for some reason
1742 */
1743 if (td->terminate) {
1744 todo--;
1745 continue;
1746 }
1747
fc24389f 1748 if (td->start_delay) {
be33abe4 1749 spent = mtime_since_now(&genesis);
fc24389f
JA
1750
1751 if (td->start_delay * 1000 > spent)
1752 continue;
1753 }
1754
2a81240d 1755 if (td->stonewall && (nr_started || nr_running))
ea6f96a2 1756 break;
2a81240d 1757
40ef7f64
JA
1758 td_set_runstate(td, TD_CREATED);
1759 check_str_update(td, nr_running, t_rate, m_rate);
fc24389f
JA
1760 sem_init(&startup_sem, 1, 1);
1761 todo--;
2a81240d 1762 nr_started++;
fc24389f 1763
189873de
JA
1764 if (td->use_thread) {
1765 if (pthread_create(&td->thread, NULL, thread_main, td)) {
1766 perror("thread_create");
1767 nr_started--;
1768 }
1769 } else {
1770 if (fork())
1771 sem_wait(&startup_sem);
1772 else {
1773 fork_main(shm_id, i);
1774 exit(0);
1775 }
fc24389f
JA
1776 }
1777 }
1778
3f39453a 1779 /*
e8457004 1780 * start created threads (TD_CREATED -> TD_RUNNING)
3f39453a 1781 */
fc24389f
JA
1782 for (i = 0; i < thread_number; i++) {
1783 struct thread_data *td = &threads[i];
1784
3f39453a
JA
1785 if (td->runstate != TD_CREATED)
1786 continue;
1787
40ef7f64 1788 td_set_runstate(td, TD_RUNNING);
3f39453a 1789 nr_running++;
2a81240d 1790 nr_started--;
3f39453a
JA
1791 m_rate += td->ratemin;
1792 t_rate += td->rate;
40ef7f64 1793 check_str_update(td, nr_running, t_rate, m_rate);
3f39453a 1794 sem_post(&td->mutex);
fc24389f
JA
1795 }
1796
e8457004
JA
1797 for (i = 0; i < thread_number; i++) {
1798 struct thread_data *td = &threads[i];
1799
b48889bb
JA
1800 if (td->runstate != TD_RUNNING &&
1801 td->runstate != TD_VERIFYING)
e8457004
JA
1802 continue;
1803
40ef7f64 1804 check_str_update(td, nr_running, t_rate, m_rate);
e8457004
JA
1805 }
1806
213b446c 1807 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1808
fc24389f
JA
1809 if (todo)
1810 usleep(100000);
1811 }
02bdd9ba
JA
1812
1813 while (nr_running) {
213b446c 1814 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1815 usleep(10000);
1816 }
fc24389f
JA
1817}
1818
0d80f40d 1819static void show_group_stats(struct group_run_stats *rs, int id)
8867c0a8 1820{
0d80f40d
JA
1821 printf("\nRun status group %d:\n", id);
1822
1823 if (rs->max_run[DDIR_READ])
1824 printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[0], rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
1825 if (rs->max_run[DDIR_WRITE])
1826 printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[1], rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
1827}
1828
1829static void show_run_stats(void)
1830{
1831 struct group_run_stats *runstats, *rs;
557e4102 1832 struct thread_data *td;
8867c0a8
JA
1833 int i;
1834
0d80f40d
JA
1835 runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
1836
1837 for (i = 0; i < groupid + 1; i++) {
1838 rs = &runstats[i];
1839
f6dcd824 1840 memset(rs, 0, sizeof(*rs));
0d80f40d
JA
1841 rs->min_bw[0] = rs->min_run[0] = ~0UL;
1842 rs->min_bw[1] = rs->min_run[1] = ~0UL;
0d80f40d
JA
1843 }
1844
1845 for (i = 0; i < thread_number; i++) {
0d80f40d
JA
1846 unsigned long bw = 0;
1847
557e4102
JA
1848 td = &threads[i];
1849
1850 if (td->error)
1851 continue;
1852
0d80f40d
JA
1853 rs = &runstats[td->groupid];
1854
557e4102
JA
1855 if (td->runtime < rs->min_run[td->ddir])
1856 rs->min_run[td->ddir] = td->runtime;
1857 if (td->runtime > rs->max_run[td->ddir])
1858 rs->max_run[td->ddir] = td->runtime;
0d80f40d 1859
557e4102
JA
1860 if (td->runtime)
1861 bw = td->io_bytes / td->runtime;
1862 if (bw < rs->min_bw[td->ddir])
1863 rs->min_bw[td->ddir] = bw;
1864 if (bw > rs->max_bw[td->ddir])
1865 rs->max_bw[td->ddir] = bw;
0d80f40d 1866
557e4102 1867 rs->io_mb[td->ddir] += td->io_bytes >> 20;
0d80f40d 1868 }
9d489c62 1869
0d80f40d
JA
1870 for (i = 0; i < groupid + 1; i++) {
1871 rs = &runstats[i];
1872
1873 if (rs->max_run[0])
1874 rs->agg[0] = (rs->io_mb[0]*1024*1000) / rs->max_run[0];
1875 if (rs->max_run[1])
1876 rs->agg[1] = (rs->io_mb[1]*1024*1000) / rs->max_run[1];
0d80f40d 1877 }
557e4102
JA
1878
1879 for (i = 0; i < thread_number; i++) {
1880 td = &threads[i];
1881 rs = &runstats[td->groupid];
1882
c4c8f7b3 1883 show_thread_status(td, rs);
557e4102 1884 }
9d489c62
JA
1885
1886 for (i = 0; i < groupid + 1; i++)
1887 show_group_stats(&runstats[i], i);
0d80f40d
JA
1888}
1889
1890int main(int argc, char *argv[])
1891{
27c32a38 1892 memset(run_str, 0, sizeof(run_str));
5961d92c 1893
27c32a38 1894 if (parse_options(argc, argv))
5961d92c 1895 return 1;
7dd1389e 1896
4240cfa1
JA
1897 if (!thread_number) {
1898 printf("Nothing to do\n");
1899 return 1;
1900 }
7dd1389e 1901
fc24389f 1902 run_threads(argv);
0d80f40d 1903 show_run_stats();
fc24389f 1904
892199bd
JA
1905 return 0;
1906}