[PATCH] fio: various fixes for using a bdev directly
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
e128065d 29#include <math.h>
49d2caab 30#include <assert.h>
189873de 31#include <pthread.h>
892199bd
JA
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/wait.h>
892199bd
JA
35#include <sys/ipc.h>
36#include <sys/shm.h>
c94deb1c 37#include <sys/ioctl.h>
892199bd
JA
38#include <asm/unistd.h>
39
27c32a38 40#include "fio.h"
892199bd 41
892199bd
JA
42#define MASK (4095)
43
4240cfa1 44#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 45
27c32a38
JA
46int groupid = 0;
47int thread_number = 0;
48char run_str[MAX_JOBS + 1];
49int shm_id = 0;
892199bd 50
02bdd9ba
JA
51/*
52 * thread life cycle
53 */
54enum {
55 TD_NOT_CREATED = 0,
56 TD_CREATED,
e8457004
JA
57 TD_RUNNING,
58 TD_VERIFYING,
02bdd9ba
JA
59 TD_EXITED,
60 TD_REAPED,
61};
62
2c83567e
JA
63/*
64 * The io unit
65 */
66struct io_u {
67 struct iocb iocb;
57d753e3 68 struct timeval start_time;
2c83567e
JA
69 struct timeval issue_time;
70
2c83567e
JA
71 char *buf;
72 unsigned int buflen;
4ac89145 73 unsigned long long offset;
2c83567e
JA
74
75 struct list_head list;
76};
77
62bb4285 78#define should_fsync(td) (td_write(td) && !(td)->odirect)
02983297 79
892199bd
JA
80static sem_t startup_sem;
81
27c32a38
JA
82#define TERMINATE_ALL (-1)
83
84static void terminate_threads(int groupid)
892199bd
JA
85{
86 int i;
87
213b446c
JA
88 for (i = 0; i < thread_number; i++) {
89 struct thread_data *td = &threads[i];
90
27c32a38
JA
91 if (groupid == TERMINATE_ALL || groupid == td->groupid) {
92 td->terminate = 1;
93 td->start_delay = 0;
94 }
213b446c 95 }
02bdd9ba
JA
96}
97
27c32a38 98static void sig_handler(int sig)
946d8870 99{
27c32a38 100 terminate_threads(TERMINATE_ALL);
892199bd
JA
101}
102
5c24b2c4 103static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
104{
105 double sec, usec;
106
107 sec = e->tv_sec - s->tv_sec;
108 usec = e->tv_usec - s->tv_usec;
109 if (sec > 0 && usec < 0) {
110 sec--;
111 usec += 1000000;
112 }
113
114 sec *= (double) 1000000;
115
116 return sec + usec;
117}
118
fd11d7af
JA
119static unsigned long utime_since_now(struct timeval *s)
120{
121 struct timeval t;
122
123 gettimeofday(&t, NULL);
124 return utime_since(s, &t);
125}
126
5c24b2c4 127static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
128{
129 double sec, usec;
130
131 sec = e->tv_sec - s->tv_sec;
132 usec = e->tv_usec - s->tv_usec;
133 if (sec > 0 && usec < 0) {
134 sec--;
135 usec += 1000000;
136 }
137
138 sec *= (double) 1000;
139 usec /= (double) 1000;
140
141 return sec + usec;
142}
143
be33abe4
JA
144static unsigned long mtime_since_now(struct timeval *s)
145{
146 struct timeval t;
147
148 gettimeofday(&t, NULL);
149 return mtime_since(s, &t);
150}
151
98168d55
JA
152static inline unsigned long msec_now(struct timeval *s)
153{
154 return s->tv_sec * 1000 + s->tv_usec / 1000;
155}
156
49d2caab
JA
157static int random_map_free(struct thread_data *td, unsigned long long block)
158{
75b2ab2c
JA
159 unsigned int idx = RAND_MAP_IDX(td, block);
160 unsigned int bit = RAND_MAP_BIT(td, block);
49d2caab
JA
161
162 return (td->file_map[idx] & (1UL << bit)) == 0;
163}
164
165static int get_next_free_block(struct thread_data *td, unsigned long long *b)
892199bd 166{
49d2caab
JA
167 int i;
168
169 *b = 0;
170 i = 0;
171 while ((*b) * td->min_bs < td->io_size) {
172 if (td->file_map[i] != -1UL) {
173 *b += ffz(td->file_map[i]);
174 return 0;
175 }
176
177 *b += BLOCKS_PER_MAP;
178 i++;
179 }
180
181 return 1;
182}
183
184static void mark_random_map(struct thread_data *td, struct io_u *io_u)
185{
186 unsigned long block = io_u->offset / td->min_bs;
187 unsigned int blocks = 0;
188
189 while (blocks < (io_u->buflen / td->min_bs)) {
190 int idx, bit;
191
192 if (!random_map_free(td, block))
193 break;
194
75b2ab2c
JA
195 idx = RAND_MAP_IDX(td, block);
196 bit = RAND_MAP_BIT(td, block);
49d2caab
JA
197
198 assert(idx < td->num_maps);
199
200 td->file_map[idx] |= (1UL << bit);
201 block++;
202 blocks++;
203 }
204
205 if ((blocks * td->min_bs) < io_u->buflen)
206 io_u->buflen = blocks * td->min_bs;
207}
208
209static int get_next_offset(struct thread_data *td, unsigned long long *offset)
210{
75b2ab2c 211 unsigned long long b, rb;
d32d9284 212 long r;
892199bd
JA
213
214 if (!td->sequential) {
49d2caab
JA
215 unsigned long max_blocks = td->io_size / td->min_bs;
216 int loops = 50;
217
218 do {
219 lrand48_r(&td->random_state, &r);
220 b = ((max_blocks - 1) * r / (RAND_MAX+1.0));
75b2ab2c 221 rb = b + (td->file_offset / td->min_bs);
49d2caab 222 loops--;
75b2ab2c 223 } while (!random_map_free(td, rb) && loops);
49d2caab
JA
224
225 if (!loops) {
226 if (get_next_free_block(td, &b))
227 return 1;
228 }
7889f07b 229 } else
49d2caab 230 b = td->last_bytes / td->min_bs;
7889f07b 231
49d2caab 232 *offset = (b * td->min_bs) + td->file_offset;
75b2ab2c
JA
233 if (*offset > td->file_size)
234 return 1;
235
49d2caab 236 return 0;
7889f07b
JA
237}
238
239static unsigned int get_next_buflen(struct thread_data *td)
240{
241 unsigned int buflen;
d32d9284 242 long r;
7889f07b
JA
243
244 if (td->min_bs == td->max_bs)
245 buflen = td->min_bs;
246 else {
d32d9284 247 lrand48_r(&td->bsrange_state, &r);
7889f07b
JA
248 buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
249 buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
892199bd
JA
250 }
251
49d2caab
JA
252 if (buflen > td->io_size - td->this_io_bytes)
253 buflen = td->io_size - td->this_io_bytes;
7889f07b 254
7889f07b 255 return buflen;
892199bd
JA
256}
257
57d753e3
JA
258static inline void add_stat_sample(struct thread_data *td, struct io_stat *is,
259 unsigned long val)
892199bd 260{
57d753e3
JA
261 if (val > is->max_val)
262 is->max_val = val;
263 if (val < is->min_val)
264 is->min_val = val;
265
266 is->val += val;
267 is->val_sq += val * val;
268 is->samples++;
269}
fd1ae4c9 270
a0a9b35b
JA
271static void add_log_sample(struct thread_data *td, struct io_log *log,
272 unsigned long val)
273{
274 if (log->nr_samples == log->max_samples) {
275 int new_size = sizeof(struct io_sample) * log->max_samples * 2;
276
277 log->log = realloc(log->log, new_size);
278 log->max_samples <<= 1;
279 }
280
281 log->log[log->nr_samples].val = val;
282 log->log[log->nr_samples].time = mtime_since_now(&td->start);
283 log->nr_samples++;
284}
285
57d753e3
JA
286static void add_clat_sample(struct thread_data *td, unsigned long msec)
287{
288 add_stat_sample(td, &td->clat_stat, msec);
a0a9b35b
JA
289
290 if (td->lat_log)
291 add_log_sample(td, td->lat_log, msec);
57d753e3 292}
fd1ae4c9 293
57d753e3
JA
294static void add_slat_sample(struct thread_data *td, unsigned long msec)
295{
296 add_stat_sample(td, &td->slat_stat, msec);
297}
fd1ae4c9 298
645785e5 299static void add_bw_sample(struct thread_data *td)
57d753e3
JA
300{
301 unsigned long spent = mtime_since_now(&td->stat_sample_time);
302 unsigned long rate;
303
1d035750 304 if (spent < td->bw_avg_time)
57d753e3
JA
305 return;
306
49d2caab 307 rate = (td->this_io_bytes - td->stat_io_bytes) / spent;
57d753e3
JA
308 add_stat_sample(td, &td->bw_stat, rate);
309
a0a9b35b
JA
310 if (td->bw_log)
311 add_log_sample(td, td->bw_log, rate);
312
57d753e3 313 gettimeofday(&td->stat_sample_time, NULL);
49d2caab 314 td->stat_io_bytes = td->this_io_bytes;
892199bd
JA
315}
316
fd11d7af
JA
317/*
318 * busy looping version for the last few usec
319 */
320static void __usec_sleep(int usec)
321{
322 struct timeval start;
323
324 gettimeofday(&start, NULL);
325 while (utime_since_now(&start) < usec)
3782a8cd 326 nop;
fd11d7af
JA
327}
328
5c24b2c4 329static void usec_sleep(int usec)
892199bd 330{
86184d14
JA
331 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
332 struct timespec rem;
892199bd
JA
333
334 do {
fd11d7af
JA
335 if (usec < 5000) {
336 __usec_sleep(usec);
337 break;
338 }
86184d14
JA
339 rem.tv_sec = rem.tv_nsec = 0;
340 nanosleep(&req, &rem);
341 if (!rem.tv_nsec)
892199bd 342 break;
86184d14
JA
343
344 req.tv_nsec = rem.tv_nsec;
fd11d7af 345 usec = rem.tv_nsec * 1000;
892199bd
JA
346 } while (1);
347}
348
9e850933
JA
349static void rate_throttle(struct thread_data *td, unsigned long time_spent,
350 unsigned int bytes)
86184d14 351{
9e850933
JA
352 unsigned long usec_cycle;
353
4240cfa1
JA
354 if (!td->rate)
355 return;
356
9e850933
JA
357 usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
358
359 if (time_spent < usec_cycle) {
360 unsigned long s = usec_cycle - time_spent;
86184d14
JA
361
362 td->rate_pending_usleep += s;
fad86e6a 363 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
364 usec_sleep(td->rate_pending_usleep);
365 td->rate_pending_usleep = 0;
366 }
4240cfa1 367 } else {
9e850933 368 long overtime = time_spent - usec_cycle;
42b2b9fe 369
4240cfa1
JA
370 td->rate_pending_usleep -= overtime;
371 }
372}
373
5c24b2c4 374static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 375{
7607bc6b 376 unsigned long spent;
4240cfa1
JA
377 unsigned long rate;
378
379 /*
380 * allow a 2 second settle period in the beginning
381 */
7607bc6b 382 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
383 return 0;
384
385 /*
386 * if rate blocks is set, sample is running
387 */
49d2caab 388 if (td->rate_bytes) {
4240cfa1
JA
389 spent = mtime_since(&td->lastrate, now);
390 if (spent < td->ratecycle)
391 return 0;
392
49d2caab 393 rate = (td->this_io_bytes - td->rate_bytes) / spent;
4240cfa1
JA
394 if (rate < td->ratemin) {
395 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 396 if (rate_quit)
27c32a38 397 terminate_threads(td->groupid);
4240cfa1
JA
398 return 1;
399 }
86184d14 400 }
4240cfa1 401
49d2caab 402 td->rate_bytes = td->this_io_bytes;
4240cfa1
JA
403 memcpy(&td->lastrate, now, sizeof(*now));
404 return 0;
86184d14
JA
405}
406
67903a2e
JA
407static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
408{
01f79976
JA
409 if (!td->timeout)
410 return 0;
67903a2e
JA
411 if (mtime_since(&td->start, t) >= td->timeout * 1000)
412 return 1;
413
414 return 0;
415}
416
e8457004
JA
417static void fill_random_bytes(struct thread_data *td,
418 unsigned char *p, unsigned int len)
419{
645785e5 420 unsigned int todo;
40ef7f64 421 double r;
e8457004
JA
422
423 while (len) {
40ef7f64 424 drand48_r(&td->verify_state, &r);
e8457004 425
40ef7f64
JA
426 /*
427 * lrand48_r seems to be broken and only fill the bottom
428 * 32-bits, even on 64-bit archs with 64-bit longs
429 */
430 todo = sizeof(r);
e8457004
JA
431 if (todo > len)
432 todo = len;
433
434 memcpy(p, &r, todo);
435
436 len -= todo;
437 p += todo;
438 }
439}
440
9d0c6ca2
JA
441static void hexdump(void *buffer, int len)
442{
443 unsigned char *p = buffer;
444 int i;
445
446 for (i = 0; i < len; i++)
447 printf("%02x", p[i]);
448 printf("\n");
449}
450
645785e5 451static int verify_io_u(struct io_u *io_u)
e8457004
JA
452{
453 struct verify_header *hdr = (struct verify_header *) io_u->buf;
454 unsigned char *p = (unsigned char *) io_u->buf;
455 struct md5_ctx md5_ctx;
9d0c6ca2 456 int ret;
e8457004 457
840b216f 458 if (hdr->fio_magic != FIO_HDR_MAGIC)
e8457004
JA
459 return 1;
460
461 memset(&md5_ctx, 0, sizeof(md5_ctx));
462 p += sizeof(*hdr);
463 md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
464
9d0c6ca2
JA
465 ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
466 if (ret) {
467 hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
468 hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
469 }
470
471 return ret;
e8457004
JA
472}
473
cfc702bd
JA
474/*
475 * fill body of io_u->buf with random data and add a header with the
476 * (eg) sha1sum of that data.
477 */
e8457004 478static void populate_io_u(struct thread_data *td, struct io_u *io_u)
cfc702bd 479{
e8457004
JA
480 struct md5_ctx md5_ctx;
481 struct verify_header hdr;
482 unsigned char *p = (unsigned char *) io_u->buf;
483
484 hdr.fio_magic = FIO_HDR_MAGIC;
485 hdr.len = io_u->buflen;
486 p += sizeof(hdr);
487 fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
488
489 memset(&md5_ctx, 0, sizeof(md5_ctx));
490 md5_update(&md5_ctx, p, io_u->buflen - sizeof(hdr));
491 memcpy(hdr.md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
492 memcpy(io_u->buf, &hdr, sizeof(hdr));
cfc702bd
JA
493}
494
2c83567e
JA
495static void put_io_u(struct thread_data *td, struct io_u *io_u)
496{
497 list_del(&io_u->list);
498 list_add(&io_u->list, &td->io_u_freelist);
499 td->cur_depth--;
500}
501
f0f3411b
JA
502#define queue_full(td) (list_empty(&(td)->io_u_freelist))
503
e8457004
JA
504static struct io_u *__get_io_u(struct thread_data *td)
505{
506 struct io_u *io_u;
507
f0f3411b 508 if (queue_full(td))
e8457004
JA
509 return NULL;
510
511 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
512 list_del(&io_u->list);
513 list_add(&io_u->list, &td->io_u_busylist);
f4bb2243 514 td->cur_depth++;
e8457004
JA
515 return io_u;
516}
517
2c83567e
JA
518static struct io_u *get_io_u(struct thread_data *td)
519{
520 struct io_u *io_u;
521
e8457004
JA
522 io_u = __get_io_u(td);
523 if (!io_u)
2c83567e
JA
524 return NULL;
525
406e7b7c
JA
526 if (get_next_offset(td, &io_u->offset)) {
527 put_io_u(td, io_u);
49d2caab 528 return NULL;
406e7b7c 529 }
49d2caab 530
b2a369fb
JA
531 io_u->buflen = get_next_buflen(td);
532 if (!io_u->buflen) {
e8457004 533 put_io_u(td, io_u);
7889f07b 534 return NULL;
e8457004 535 }
2c83567e 536
75b2ab2c
JA
537 if (io_u->buflen + io_u->offset > td->file_size)
538 io_u->buflen = td->file_size - io_u->offset;
49d2caab
JA
539
540 if (!td->sequential)
541 mark_random_map(td, io_u);
542
543 td->last_bytes += io_u->buflen;
544
9d0c6ca2 545 if (td->verify)
e8457004 546 populate_io_u(td, io_u);
cfc702bd 547
2c83567e
JA
548 if (td->use_aio) {
549 if (td_read(td))
550 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
551 else
552 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
553 }
554
57d753e3 555 gettimeofday(&io_u->start_time, NULL);
2c83567e
JA
556 return io_u;
557}
558
40ef7f64
JA
559static inline void td_set_runstate(struct thread_data *td, int runstate)
560{
561 td->old_runstate = td->runstate;
562 td->runstate = runstate;
563}
564
645785e5
JA
565static int get_next_verify(struct thread_data *td,
566 unsigned long long *offset, unsigned int *len)
567{
568 struct io_piece *ipo;
569
570 if (list_empty(&td->io_hist_list))
571 return 1;
572
573 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
574 list_del(&ipo->list);
575
576 *offset = ipo->offset;
577 *len = ipo->len;
578 free(ipo);
579 return 0;
580}
581
9d0c6ca2
JA
582static void prune_io_piece_log(struct thread_data *td)
583{
584 struct io_piece *ipo;
585
586 while (!list_empty(&td->io_hist_list)) {
587 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
588
589 list_del(&ipo->list);
590 free(ipo);
591 }
592}
593
9d0c6ca2
JA
594/*
595 * log a succesful write, so we can unwind the log for verify
596 */
597static void log_io_piece(struct thread_data *td, struct io_u *io_u)
598{
49d2caab 599 struct io_piece *ipo = malloc(sizeof(struct io_piece));
9d0c6ca2
JA
600 struct list_head *entry;
601
602 INIT_LIST_HEAD(&ipo->list);
603 ipo->offset = io_u->offset;
604 ipo->len = io_u->buflen;
605
49d2caab
JA
606 /*
607 * for random io where the writes extend the file, it will typically
608 * be laid out with the block scattered as written. it's faster to
609 * read them in in that order again, so don't sort
610 */
611 if (td->sequential || !td->overwrite) {
9d0c6ca2
JA
612 list_add_tail(&ipo->list, &td->io_hist_list);
613 return;
614 }
615
616 /*
617 * for random io, sort the list so verify will run faster
618 */
619 entry = &td->io_hist_list;
620 while ((entry = entry->prev) != &td->io_hist_list) {
621 struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
622
9d0c6ca2
JA
623 if (__ipo->offset < ipo->offset)
624 break;
625 }
626
627 list_add(&ipo->list, entry);
628}
629
91fc5dc9 630static void do_sync_verify(struct thread_data *td)
cfc702bd 631{
40ef7f64 632 struct timeval t;
e8457004 633 struct io_u *io_u = NULL;
645785e5 634 int ret;
e8457004 635
40ef7f64 636 td_set_runstate(td, TD_VERIFYING);
e8457004
JA
637
638 io_u = __get_io_u(td);
639
40ef7f64 640 if (!td->odirect) {
49d2caab 641 if (fadvise(td->fd, td->file_offset, td->io_size, POSIX_FADV_DONTNEED) < 0) {
40ef7f64
JA
642 td->error = errno;
643 goto out;
644 }
645 }
646
e8457004
JA
647 do {
648 if (td->terminate)
649 break;
40ef7f64
JA
650
651 gettimeofday(&t, NULL);
652 if (runtime_exceeded(td, &t))
653 break;
654
645785e5
JA
655 if (get_next_verify(td, &io_u->offset, &io_u->buflen))
656 break;
657
658 if (td->cur_off != io_u->offset) {
659 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
660 td->error = errno;
661 break;
662 }
663 }
e8457004
JA
664
665 ret = read(td->fd, io_u->buf, io_u->buflen);
666 if (ret < (int) io_u->buflen) {
667 if (ret == -1) {
668 td->error = errno;
669 break;
670 } else if (!ret)
671 break;
672 else
673 io_u->buflen = ret;
674 }
675
645785e5 676 if (verify_io_u(io_u))
e8457004
JA
677 break;
678
645785e5 679 td->cur_off = io_u->offset + io_u->buflen;
e8457004
JA
680 } while (1);
681
682out:
40ef7f64 683 td_set_runstate(td, TD_RUNNING);
e8457004 684 put_io_u(td, io_u);
cfc702bd
JA
685}
686
43000118 687static void do_sync_io(struct thread_data *td)
892199bd 688{
7889f07b 689 unsigned long msec, usec;
e8457004 690 struct io_u *io_u = NULL;
2c83567e 691 struct timeval e;
892199bd 692
49d2caab 693 while (td->this_io_bytes < td->io_size) {
892199bd
JA
694 int ret;
695
696 if (td->terminate)
697 break;
698
2c83567e 699 io_u = get_io_u(td);
7889f07b
JA
700 if (!io_u)
701 break;
2c83567e 702
63a09e51
JA
703 if (td->cur_off != io_u->offset) {
704 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
705 td->error = errno;
706 break;
707 }
892199bd
JA
708 }
709
02983297 710 if (td_read(td))
2c83567e 711 ret = read(td->fd, io_u->buf, io_u->buflen);
892199bd 712 else
2c83567e 713 ret = write(td->fd, io_u->buf, io_u->buflen);
892199bd 714
2c83567e 715 if (ret < (int) io_u->buflen) {
892199bd
JA
716 if (ret == -1)
717 td->error = errno;
718 break;
719 }
720
62bb4285 721 if (td_write(td))
645785e5
JA
722 log_io_piece(td, io_u);
723
4240cfa1 724 td->io_blocks++;
49d2caab
JA
725 td->io_bytes += io_u->buflen;
726 td->this_io_bytes += io_u->buflen;
63a09e51 727 td->cur_off = io_u->offset + io_u->buflen;
4240cfa1 728
86184d14
JA
729 gettimeofday(&e, NULL);
730
57d753e3 731 usec = utime_since(&io_u->start_time, &e);
86184d14 732
9e850933 733 rate_throttle(td, usec, io_u->buflen);
892199bd 734
4240cfa1
JA
735 if (check_min_rate(td, &e)) {
736 td->error = ENODATA;
737 break;
738 }
892199bd 739
4240cfa1 740 msec = usec / 1000;
57d753e3 741 add_clat_sample(td, msec);
645785e5 742 add_bw_sample(td);
67903a2e
JA
743
744 if (runtime_exceeded(td, &e))
745 break;
2c83567e 746
cdf92433 747 put_io_u(td, io_u);
e8457004 748 io_u = NULL;
cdf92433 749
e97712ed
JA
750 if (td->thinktime)
751 usec_sleep(td->thinktime);
752
cdf92433
JA
753 if (should_fsync(td) && td->fsync_blocks &&
754 (td->io_blocks % td->fsync_blocks) == 0)
755 fsync(td->fd);
892199bd
JA
756 }
757
e8457004
JA
758 if (io_u)
759 put_io_u(td, io_u);
760
4240cfa1 761 if (should_fsync(td))
892199bd 762 fsync(td->fd);
892199bd 763}
43000118 764
1ad72b11
JA
765static int io_u_getevents(struct thread_data *td, int min, int max,
766 struct timespec *t)
767{
768 int r;
769
770 do {
771 r = io_getevents(td->aio_ctx, min, max, td->aio_events, t);
772 if (r != -EAGAIN && r != -EINTR)
773 break;
774 } while (1);
775
776 return r;
777}
778
2c83567e 779static int io_u_queue(struct thread_data *td, struct io_u *io_u)
56b0eff0 780{
2c83567e 781 struct iocb *iocb = &io_u->iocb;
56b0eff0
JA
782 int ret;
783
784 do {
254605cd 785 ret = io_submit(td->aio_ctx, 1, &iocb);
56b0eff0
JA
786 if (ret == 1)
787 return 0;
406e7b7c 788 else if (ret == -EAGAIN)
56b0eff0 789 usleep(100);
406e7b7c 790 else if (ret == -EINTR)
a592bd33 791 continue;
56b0eff0
JA
792 else
793 break;
794 } while (1);
795
a592bd33 796 return ret;
56b0eff0
JA
797}
798
98168d55 799#define iocb_time(iocb) ((unsigned long) (iocb)->data)
2c83567e
JA
800#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
801
f0f3411b 802static int ios_completed(struct thread_data *td, int nr)
2c83567e
JA
803{
804 unsigned long msec;
805 struct io_u *io_u;
806 struct timeval e;
9e850933 807 int i, bytes_done;
2c83567e 808
f0f3411b 809 gettimeofday(&e, NULL);
2c83567e 810
9e850933 811 for (i = 0, bytes_done = 0; i < nr; i++) {
2c83567e
JA
812 io_u = ev_to_iou(td->aio_events + i);
813
f0f3411b 814 td->io_blocks++;
49d2caab
JA
815 td->io_bytes += io_u->buflen;
816 td->this_io_bytes += io_u->buflen;
8c033f93 817
f0f3411b 818 msec = mtime_since(&io_u->issue_time, &e);
2c83567e 819
f0f3411b 820 add_clat_sample(td, msec);
645785e5
JA
821 add_bw_sample(td);
822
62bb4285 823 if (td_write(td))
645785e5 824 log_io_piece(td, io_u);
2c83567e 825
f4bb2243 826 bytes_done += io_u->buflen;
2c83567e
JA
827 put_io_u(td, io_u);
828 }
9e850933
JA
829
830 return bytes_done;
2c83567e
JA
831}
832
833static void cleanup_pending_aio(struct thread_data *td)
834{
835 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
836 struct list_head *entry, *n;
837 struct io_u *io_u;
838 int r;
839
840 /*
841 * get immediately available events, if any
842 */
1ad72b11 843 r = io_u_getevents(td, 0, td->cur_depth, &ts);
2c83567e 844 if (r > 0)
f0f3411b 845 ios_completed(td, r);
2c83567e
JA
846
847 /*
848 * now cancel remaining active events
849 */
850 list_for_each_safe(entry, n, &td->io_u_busylist) {
851 io_u = list_entry(entry, struct io_u, list);
852
853 r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events);
854 if (!r)
855 put_io_u(td, io_u);
856 }
857
858 if (td->cur_depth) {
1ad72b11 859 r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
2c83567e 860 if (r > 0)
f0f3411b 861 ios_completed(td, r);
2c83567e
JA
862 }
863}
98168d55 864
d32d9284
JA
865static int async_do_verify(struct thread_data *td, struct io_u **io_u)
866{
867 struct io_u *v_io_u = *io_u;
868 int ret = 0;
869
870 if (v_io_u) {
645785e5 871 ret = verify_io_u(v_io_u);
d32d9284
JA
872 put_io_u(td, v_io_u);
873 *io_u = NULL;
874 }
875
876 return ret;
877}
878
91fc5dc9 879static void do_async_verify(struct thread_data *td)
cfc702bd 880{
f4bb2243 881 struct timeval t;
d32d9284 882 struct io_u *io_u, *v_io_u = NULL;
645785e5 883 int ret;
f4bb2243
JA
884
885 td_set_runstate(td, TD_VERIFYING);
886
f4bb2243
JA
887 do {
888 if (td->terminate)
889 break;
890
891 gettimeofday(&t, NULL);
892 if (runtime_exceeded(td, &t))
893 break;
894
895 io_u = __get_io_u(td);
896 if (!io_u)
897 break;
898
645785e5
JA
899 if (get_next_verify(td, &io_u->offset, &io_u->buflen)) {
900 put_io_u(td, io_u);
901 break;
f4bb2243
JA
902 }
903
904 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
905 ret = io_u_queue(td, io_u);
906 if (ret) {
907 put_io_u(td, io_u);
908 td->error = ret;
909 break;
910 }
911
f0f3411b
JA
912 /*
913 * we have one pending to verify, do that while the next
914 * we are doing io on the next one
915 */
d32d9284
JA
916 if (async_do_verify(td, &v_io_u))
917 break;
f0f3411b 918
1ad72b11 919 ret = io_u_getevents(td, 1, 1, NULL);
f0f3411b
JA
920 if (ret != 1) {
921 if (ret < 0)
922 td->error = ret;
f4bb2243
JA
923 break;
924 }
925
f0f3411b 926 v_io_u = ev_to_iou(td->aio_events);
f4bb2243 927
645785e5 928 td->cur_off = v_io_u->offset + v_io_u->buflen;
f0f3411b
JA
929
930 /*
d32d9284 931 * if we can't submit more io, we need to verify now
f0f3411b 932 */
d32d9284
JA
933 if (queue_full(td) && async_do_verify(td, &v_io_u))
934 break;
935
f4bb2243
JA
936 } while (1);
937
d32d9284 938 async_do_verify(td, &v_io_u);
f0f3411b 939
f4bb2243
JA
940 if (td->cur_depth)
941 cleanup_pending_aio(td);
942
943 td_set_runstate(td, TD_RUNNING);
cfc702bd
JA
944}
945
43000118
JA
946static void do_async_io(struct thread_data *td)
947{
948 struct timeval s, e;
7889f07b 949 unsigned long usec;
43000118 950
49d2caab 951 while (td->this_io_bytes < td->io_size) {
43000118
JA
952 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
953 struct timespec *timeout;
2c83567e
JA
954 int ret, min_evts = 0;
955 struct io_u *io_u;
9e850933 956 unsigned int bytes_done;
43000118
JA
957
958 if (td->terminate)
959 break;
960
2c83567e 961 io_u = get_io_u(td);
7889f07b
JA
962 if (!io_u)
963 break;
43000118 964
57d753e3 965 memcpy(&s, &io_u->start_time, sizeof(s));
8baf1bcc 966
2c83567e 967 ret = io_u_queue(td, io_u);
56b0eff0 968 if (ret) {
a3fdb993 969 put_io_u(td, io_u);
a592bd33 970 td->error = ret;
43000118
JA
971 break;
972 }
973
57d753e3
JA
974 gettimeofday(&io_u->issue_time, NULL);
975 add_slat_sample(td, mtime_since(&io_u->start_time, &io_u->issue_time));
2c83567e 976 if (td->cur_depth < td->aio_depth) {
43000118
JA
977 timeout = &ts;
978 min_evts = 0;
979 } else {
980 timeout = NULL;
981 min_evts = 1;
982 }
983
1ad72b11 984 ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
43000118 985 if (ret < 0) {
406e7b7c 986 td->error = ret;
43000118
JA
987 break;
988 } else if (!ret)
989 continue;
990
f0f3411b 991 bytes_done = ios_completed(td, ret);
43000118 992
98168d55
JA
993 /*
994 * the rate is batched for now, it should work for batches
995 * of completions except the very first one which may look
996 * a little bursty
997 */
2c83567e 998 gettimeofday(&e, NULL);
43000118
JA
999 usec = utime_since(&s, &e);
1000
9e850933 1001 rate_throttle(td, usec, bytes_done);
43000118
JA
1002
1003 if (check_min_rate(td, &e)) {
1004 td->error = ENODATA;
1005 break;
1006 }
67903a2e
JA
1007
1008 if (runtime_exceeded(td, &e))
1009 break;
765d9223
JA
1010
1011 if (td->thinktime)
1012 usec_sleep(td->thinktime);
cdf92433
JA
1013
1014 if (should_fsync(td) && td->fsync_blocks &&
1015 (td->io_blocks % td->fsync_blocks) == 0)
1016 fsync(td->fd);
43000118 1017 }
56b0eff0 1018
2c83567e
JA
1019 if (td->cur_depth)
1020 cleanup_pending_aio(td);
4ac89145
JA
1021
1022 if (should_fsync(td))
1023 fsync(td->fd);
56b0eff0
JA
1024}
1025
1026static void cleanup_aio(struct thread_data *td)
1027{
254605cd
JA
1028 io_destroy(td->aio_ctx);
1029
43000118
JA
1030 if (td->aio_events)
1031 free(td->aio_events);
43000118
JA
1032}
1033
1034static int init_aio(struct thread_data *td)
1035{
254605cd 1036 if (io_queue_init(td->aio_depth, &td->aio_ctx)) {
43000118
JA
1037 td->error = errno;
1038 return 1;
1039 }
1040
43000118 1041 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
43000118
JA
1042 return 0;
1043}
1044
2c83567e
JA
1045static void cleanup_io_u(struct thread_data *td)
1046{
1047 struct list_head *entry, *n;
1048 struct io_u *io_u;
1049
1050 list_for_each_safe(entry, n, &td->io_u_freelist) {
1051 io_u = list_entry(entry, struct io_u, list);
1052
1053 list_del(&io_u->list);
2c83567e
JA
1054 free(io_u);
1055 }
6b71c826 1056
99c6704f
JA
1057 if (td->mem_type == MEM_MALLOC)
1058 free(td->orig_buffer);
1059 else if (td->mem_type == MEM_SHM) {
1060 struct shmid_ds sbuf;
1061
1062 shmdt(td->orig_buffer);
1063 shmctl(td->shm_id, IPC_RMID, &sbuf);
1064 }
2c83567e
JA
1065}
1066
99c6704f 1067static int init_io_u(struct thread_data *td)
2c83567e
JA
1068{
1069 struct io_u *io_u;
99c6704f 1070 int i, max_units, mem_size;
6b71c826 1071 char *p;
2c83567e
JA
1072
1073 if (!td->use_aio)
1074 max_units = 1;
1075 else
1076 max_units = td->aio_depth;
1077
7889f07b 1078 mem_size = td->max_bs * max_units + MASK;
99c6704f
JA
1079
1080 if (td->mem_type == MEM_MALLOC)
1081 td->orig_buffer = malloc(mem_size);
1082 else if (td->mem_type == MEM_SHM) {
1083 td->shm_id = shmget(IPC_PRIVATE, mem_size, IPC_CREAT | 0600);
1084 if (td->shm_id < 0) {
1085 td->error = errno;
1086 perror("shmget");
1087 return 1;
1088 }
1089
1090 td->orig_buffer = shmat(td->shm_id, NULL, 0);
1091 if (td->orig_buffer == (void *) -1) {
1092 td->error = errno;
1093 perror("shmat");
1094 return 1;
1095 }
1096 }
6b71c826 1097
2c83567e
JA
1098 INIT_LIST_HEAD(&td->io_u_freelist);
1099 INIT_LIST_HEAD(&td->io_u_busylist);
645785e5 1100 INIT_LIST_HEAD(&td->io_hist_list);
2c83567e 1101
99c6704f 1102 p = ALIGN(td->orig_buffer);
2c83567e
JA
1103 for (i = 0; i < max_units; i++) {
1104 io_u = malloc(sizeof(*io_u));
1105 memset(io_u, 0, sizeof(*io_u));
1106 INIT_LIST_HEAD(&io_u->list);
1107
7889f07b 1108 io_u->buf = p + td->max_bs * i;
2c83567e
JA
1109 list_add(&io_u->list, &td->io_u_freelist);
1110 }
99c6704f
JA
1111
1112 return 0;
2c83567e
JA
1113}
1114
02983297
JA
1115static int create_file(struct thread_data *td)
1116{
7889f07b 1117 unsigned long long left;
645785e5 1118 unsigned int bs;
02983297 1119 char *b;
645785e5 1120 int r;
02983297 1121
02983297
JA
1122 /*
1123 * unless specifically asked for overwrite, let normal io extend it
1124 */
62bb4285 1125 if (td_write(td) && !td->overwrite)
02983297
JA
1126 return 0;
1127
57d753e3
JA
1128 if (!td->file_size) {
1129 fprintf(stderr, "Need size for create\n");
1130 td->error = EINVAL;
1131 return 1;
1132 }
1133
42fd89a7
JA
1134 printf("Client%d: Laying out IO file\n", td->thread_number);
1135
02983297
JA
1136 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1137 if (td->fd < 0) {
1138 td->error = errno;
1139 return 1;
1140 }
1141
c94deb1c
JA
1142 if (ftruncate(td->fd, td->file_size) == -1) {
1143 td->error = errno;
1144 return 1;
1145 }
1146
49d2caab 1147 td->io_size = td->file_size;
7889f07b
JA
1148 b = malloc(td->max_bs);
1149 memset(b, 0, td->max_bs);
1150
1151 left = td->file_size;
1152 while (left) {
1153 bs = td->max_bs;
1154 if (bs > left)
1155 bs = left;
02983297 1156
7889f07b 1157 r = write(td->fd, b, bs);
02983297 1158
645785e5 1159 if (r == (int) bs) {
7889f07b 1160 left -= bs;
02983297 1161 continue;
7889f07b 1162 } else {
02983297
JA
1163 if (r < 0)
1164 td->error = errno;
1165 else
1166 td->error = EIO;
1167
1168 break;
1169 }
1170 }
1171
fc097bfe
JA
1172 if (td->create_fsync)
1173 fsync(td->fd);
1174
02983297
JA
1175 close(td->fd);
1176 td->fd = -1;
1177 free(b);
1178 return 0;
1179}
1180
1181static int file_exists(struct thread_data *td)
1182{
1183 struct stat st;
1184
1185 if (stat(td->file_name, &st) != -1)
1186 return 1;
1187
1188 return errno != ENOENT;
1189}
1190
c4c8f7b3 1191static int file_size(struct thread_data *td)
02983297
JA
1192{
1193 struct stat st;
c94deb1c
JA
1194
1195 if (fstat(td->fd, &st) == -1) {
1196 td->error = errno;
1197 return 1;
1198 }
1199
c94deb1c 1200 if (td_read(td)) {
c4c8f7b3
JA
1201 if (!td->file_size || td->file_size > st.st_size)
1202 td->file_size = st.st_size;
c94deb1c
JA
1203 } else {
1204 if (!td->file_size)
1205 td->file_size = 1024 * 1024 * 1024;
c4c8f7b3
JA
1206 }
1207
1208 return 0;
1209}
1210
1211static int bdev_size(struct thread_data *td)
1212{
1213 size_t bytes;
c94deb1c 1214
c4c8f7b3
JA
1215 if (ioctl(td->fd, BLKGETSIZE64, &bytes) < 0) {
1216 td->error = errno;
1217 return 1;
c94deb1c
JA
1218 }
1219
c4c8f7b3
JA
1220 if (!td->file_size || (td->file_size > bytes))
1221 td->file_size = bytes;
1222
1223 return 0;
1224}
1225
1226static int get_file_size(struct thread_data *td)
1227{
1228 int ret;
1229
1230 if (td->filetype == FIO_TYPE_FILE)
1231 ret = file_size(td);
1232 else
1233 ret = bdev_size(td);
1234
1235 if (ret)
1236 return ret;
1237
1238 if (td->file_offset > td->file_size) {
c94deb1c
JA
1239 fprintf(stderr, "Client%d: offset larger than length\n", td->thread_number);
1240 return 1;
1241 }
1242
c4c8f7b3 1243 td->io_size = td->file_size - td->file_offset;
c94deb1c
JA
1244 if (td->io_size == 0) {
1245 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
1246 td->error = EINVAL;
1247 return 1;
1248 }
1249
1250 return 0;
1251}
1252
1253static int setup_file(struct thread_data *td)
1254{
02983297
JA
1255 int flags = 0;
1256
1257 if (!file_exists(td)) {
1258 if (!td->create_file) {
1259 td->error = ENOENT;
1260 return 1;
1261 }
1262 if (create_file(td))
1263 return 1;
1264 }
1265
1266 if (td->odirect)
1267 flags |= O_DIRECT;
1268
1269 if (td_read(td))
1270 td->fd = open(td->file_name, flags | O_RDONLY);
1271 else {
1272 if (!td->overwrite)
1273 flags |= O_TRUNC;
74b4b5fb
JA
1274 if (td->sync_io)
1275 flags |= O_SYNC;
e8457004
JA
1276 if (td->verify)
1277 flags |= O_RDWR;
1278 else
1279 flags |= O_WRONLY;
02983297 1280
e8457004 1281 td->fd = open(td->file_name, flags | O_CREAT, 0600);
02983297
JA
1282 }
1283
1284 if (td->fd == -1) {
1285 td->error = errno;
1286 return 1;
1287 }
1288
c94deb1c 1289 if (get_file_size(td))
49d2caab 1290 return 1;
49d2caab 1291
62bb4285 1292 if (td_write(td) && ftruncate(td->fd, td->file_size) == -1) {
c94deb1c 1293 td->error = errno;
02983297
JA
1294 return 1;
1295 }
1296
b95799ca 1297 if (td->invalidate_cache) {
c94deb1c 1298 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
b95799ca
JA
1299 td->error = errno;
1300 return 1;
1301 }
1302 }
1303
02983297
JA
1304 return 0;
1305}
1306
d32d9284
JA
1307static void clear_io_state(struct thread_data *td)
1308{
9d0c6ca2
JA
1309 if (!td->use_aio)
1310 lseek(td->fd, SEEK_SET, 0);
1311
d32d9284 1312 td->cur_off = 0;
49d2caab
JA
1313 td->last_bytes = 0;
1314 td->stat_io_bytes = 0;
1315 td->this_io_bytes = 0;
1316
1317 if (td->file_map)
1318 memset(td->file_map, 0, td->num_maps * sizeof(long));
d32d9284
JA
1319}
1320
f6dcd824
JA
1321static void update_rusage_stat(struct thread_data *td)
1322{
1323 if (!td->runtime)
1324 return;
1325
1326 getrusage(RUSAGE_SELF, &td->ru_end);
1327
1328 td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
1329 td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
1330 td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
1331
1332
1333 memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
1334}
1335
189873de 1336static void *thread_main(void *data)
892199bd 1337{
189873de 1338 struct thread_data *td = data;
02983297 1339 int ret = 1;
892199bd 1340
7292613b 1341 setsid();
892199bd
JA
1342 td->pid = getpid();
1343
99c6704f
JA
1344 if (init_io_u(td))
1345 goto err;
2c83567e 1346
18e0b78c
JA
1347 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
1348 td->error = errno;
1349 goto err;
1350 }
1351
43000118
JA
1352 if (td->use_aio && init_aio(td))
1353 goto err;
1354
f737299d 1355 if (td->ioprio) {
892199bd
JA
1356 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
1357 td->error = errno;
599002b3 1358 goto err;
892199bd
JA
1359 }
1360 }
1361
1362 sem_post(&startup_sem);
1363 sem_wait(&td->mutex);
43000118 1364
fc097bfe
JA
1365 if (!td->create_serialize && setup_file(td))
1366 goto err;
1367
49d2caab
JA
1368 if (init_random_state(td))
1369 goto err;
1370
293753bb 1371 while (td->loops--) {
f6dcd824
JA
1372 getrusage(RUSAGE_SELF, &td->ru_start);
1373 gettimeofday(&td->start, NULL);
1374 memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
293753bb
JA
1375
1376 if (td->ratemin)
1377 memcpy(&td->lastrate, &td->stat_sample_time, sizeof(td->lastrate));
7292613b 1378
d32d9284 1379 clear_io_state(td);
9d0c6ca2 1380 prune_io_piece_log(td);
fd1ae4c9 1381
b2de0ed2 1382 if (!td->use_aio)
b6794fbf 1383 do_sync_io(td);
b2de0ed2
JA
1384 else
1385 do_async_io(td);
1386
91fc5dc9
JA
1387 if (td->error)
1388 break;
1389
f6dcd824
JA
1390 td->runtime += mtime_since_now(&td->start);
1391 update_rusage_stat(td);
1392
b2de0ed2
JA
1393 if (!td->verify)
1394 continue;
cfc702bd 1395
b2de0ed2 1396 clear_io_state(td);
d32d9284 1397
91fc5dc9
JA
1398 if (!td->use_aio)
1399 do_sync_verify(td);
1400 else
1401 do_async_verify(td);
1402
1403 if (td->error)
1404 break;
b6794fbf 1405 }
7292613b 1406
892199bd 1407 ret = 0;
a0a9b35b
JA
1408
1409 if (td->bw_log)
1410 finish_log(td, td->bw_log, "bw");
1411 if (td->lat_log)
1412 finish_log(td, td->lat_log, "lat");
4ac89145 1413
98dd52d6 1414 if (exitall_on_terminate)
27c32a38 1415 terminate_threads(td->groupid);
98dd52d6 1416
892199bd 1417err:
7292613b
JA
1418 if (td->fd != -1) {
1419 close(td->fd);
1420 td->fd = -1;
1421 }
4ac89145
JA
1422 if (td->use_aio)
1423 cleanup_aio(td);
2c83567e 1424 cleanup_io_u(td);
599002b3 1425 if (ret) {
892199bd 1426 sem_post(&startup_sem);
599002b3
JA
1427 sem_wait(&td->mutex);
1428 }
40ef7f64 1429 td_set_runstate(td, TD_EXITED);
189873de
JA
1430 return NULL;
1431
1432}
1433
1434static void *fork_main(int shm_id, int offset)
1435{
1436 struct thread_data *td;
1437 void *data;
1438
1439 data = shmat(shm_id, NULL, 0);
1440 if (data == (void *) -1) {
1441 perror("shmat");
1442 return NULL;
1443 }
1444
1445 td = data + offset * sizeof(struct thread_data);
1446 thread_main(td);
4240cfa1 1447 shmdt(data);
892199bd
JA
1448 return NULL;
1449}
1450
57d753e3
JA
1451static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
1452 double *mean, double *dev)
1453{
1454 double n;
1455
1456 if (is->samples == 0)
1457 return 0;
1458
1459 *min = is->min_val;
1460 *max = is->max_val;
1461
1462 n = (double) is->samples;
1463 *mean = (double) is->val / n;
1464 *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
1465 return 1;
1466}
1467
557e4102
JA
1468static void show_thread_status(struct thread_data *td,
1469 struct group_run_stats *rs)
892199bd
JA
1470{
1471 int prio, prio_class;
f6dcd824 1472 unsigned long min, max, bw = 0;
92b229ed 1473 double mean, dev, usr_cpu, sys_cpu;
892199bd 1474
49d2caab 1475 if (!td->io_bytes && !td->error)
213b446c
JA
1476 return;
1477
892199bd 1478 if (td->runtime)
49d2caab 1479 bw = td->io_bytes / td->runtime;
892199bd
JA
1480
1481 prio = td->ioprio & 0xff;
1482 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
1483
f6dcd824 1484 printf("Client%d (g=%d): err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->groupid, td->error, td->io_bytes >> 20, bw, td->runtime);
fd1ae4c9 1485
57d753e3
JA
1486 if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev))
1487 printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1488 if (calc_lat(&td->clat_stat, &min, &max, &mean, &dev))
1489 printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
557e4102
JA
1490 if (calc_lat(&td->bw_stat, &min, &max, &mean, &dev)) {
1491 double p_of_agg;
1492
1493 p_of_agg = mean * 100 / (double) rs->agg[td->ddir];
1494 printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
1495 }
92b229ed
JA
1496
1497 if (td->runtime) {
f6dcd824
JA
1498 usr_cpu = (double) td->usr_time * 100 / (double) td->runtime;
1499 sys_cpu = (double) td->sys_time * 100 / (double) td->runtime;
92b229ed
JA
1500 } else {
1501 usr_cpu = 0;
1502 sys_cpu = 0;
1503 }
1504
f6dcd824 1505 printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
892199bd
JA
1506}
1507
3f39453a 1508static void print_thread_status(struct thread_data *td, int nr_running,
8dbff0b1 1509 int t_rate, int m_rate)
3f39453a 1510{
3f39453a
JA
1511 printf("Threads now running: %d", nr_running);
1512 if (m_rate || t_rate)
1513 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
8dbff0b1
JA
1514 printf(" : [%s]\r", run_str);
1515 fflush(stdout);
3f39453a
JA
1516}
1517
40ef7f64
JA
1518static void check_str_update(struct thread_data *td, int n, int t, int m)
1519{
1520 char c = run_str[td->thread_number - 1];
1521
1522 if (td->runstate == td->old_runstate)
1523 return;
1524
1525 switch (td->runstate) {
1526 case TD_REAPED:
1527 c = '_';
1528 break;
f4bb2243
JA
1529 case TD_EXITED:
1530 c = 'E';
1531 break;
40ef7f64 1532 case TD_RUNNING:
af678352
JA
1533 if (td_read(td)) {
1534 if (td->sequential)
1535 c = 'R';
1536 else
1537 c = 'r';
1538 } else {
1539 if (td->sequential)
1540 c = 'W';
1541 else
1542 c = 'w';
1543 }
40ef7f64
JA
1544 break;
1545 case TD_VERIFYING:
1546 c = 'V';
1547 break;
1548 case TD_CREATED:
1549 c = 'C';
1550 break;
1551 case TD_NOT_CREATED:
1552 c = 'P';
1553 break;
1554 default:
1555 printf("state %d\n", td->runstate);
1556 }
1557
1558 run_str[td->thread_number - 1] = c;
1559 print_thread_status(td, n, t, m);
1560 td->old_runstate = td->runstate;
1561}
1562
213b446c 1563static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1564{
213b446c 1565 int i;
02bdd9ba 1566
3f39453a
JA
1567 /*
1568 * reap exited threads (TD_EXITED -> TD_REAPED)
1569 */
02bdd9ba
JA
1570 for (i = 0; i < thread_number; i++) {
1571 struct thread_data *td = &threads[i];
1572
40ef7f64
JA
1573 check_str_update(td, *nr_running, *t_rate, *m_rate);
1574
213b446c
JA
1575 if (td->runstate != TD_EXITED)
1576 continue;
02bdd9ba 1577
40ef7f64 1578 td_set_runstate(td, TD_REAPED);
189873de
JA
1579
1580 if (td->use_thread) {
1581 long ret;
1582
1583 if (pthread_join(td->thread, (void *) &ret))
1584 perror("thread_join");
1585 } else
1586 waitpid(td->pid, NULL, 0);
1587
213b446c
JA
1588 (*nr_running)--;
1589 (*m_rate) -= td->ratemin;
1590 (*t_rate) -= td->rate;
40ef7f64 1591 check_str_update(td, *nr_running, *t_rate, *m_rate);
213b446c 1592 }
02bdd9ba
JA
1593}
1594
fc24389f
JA
1595static void run_threads(char *argv[])
1596{
be33abe4 1597 struct timeval genesis;
fc24389f
JA
1598 struct thread_data *td;
1599 unsigned long spent;
2a81240d 1600 int i, todo, nr_running, m_rate, t_rate, nr_started;
fc24389f 1601
fc24389f
JA
1602 printf("Starting %d threads\n", thread_number);
1603 fflush(stdout);
1604
7292613b
JA
1605 signal(SIGINT, sig_handler);
1606
fc24389f 1607 todo = thread_number;
02bdd9ba 1608 nr_running = 0;
2a81240d 1609 nr_started = 0;
213b446c 1610 m_rate = t_rate = 0;
fc24389f 1611
8bdcfab5
JA
1612 for (i = 0; i < thread_number; i++) {
1613 td = &threads[i];
1614
fc097bfe
JA
1615 if (!td->create_serialize)
1616 continue;
1617
8bdcfab5
JA
1618 /*
1619 * do file setup here so it happens sequentially,
1620 * we don't want X number of threads getting their
1621 * client data interspersed on disk
1622 */
1623 if (setup_file(td)) {
40ef7f64 1624 td_set_runstate(td, TD_REAPED);
8bdcfab5
JA
1625 todo--;
1626 }
1627 }
1628
1629 gettimeofday(&genesis, NULL);
1630
213b446c 1631 while (todo) {
3f39453a
JA
1632 /*
1633 * create threads (TD_NOT_CREATED -> TD_CREATED)
1634 */
fc24389f
JA
1635 for (i = 0; i < thread_number; i++) {
1636 td = &threads[i];
1637
02bdd9ba 1638 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1639 continue;
1640
213b446c
JA
1641 /*
1642 * never got a chance to start, killed by other
1643 * thread for some reason
1644 */
1645 if (td->terminate) {
1646 todo--;
1647 continue;
1648 }
1649
fc24389f 1650 if (td->start_delay) {
be33abe4 1651 spent = mtime_since_now(&genesis);
fc24389f
JA
1652
1653 if (td->start_delay * 1000 > spent)
1654 continue;
1655 }
1656
2a81240d 1657 if (td->stonewall && (nr_started || nr_running))
ea6f96a2 1658 break;
2a81240d 1659
40ef7f64
JA
1660 td_set_runstate(td, TD_CREATED);
1661 check_str_update(td, nr_running, t_rate, m_rate);
fc24389f
JA
1662 sem_init(&startup_sem, 1, 1);
1663 todo--;
2a81240d 1664 nr_started++;
fc24389f 1665
189873de
JA
1666 if (td->use_thread) {
1667 if (pthread_create(&td->thread, NULL, thread_main, td)) {
1668 perror("thread_create");
1669 nr_started--;
1670 }
1671 } else {
1672 if (fork())
1673 sem_wait(&startup_sem);
1674 else {
1675 fork_main(shm_id, i);
1676 exit(0);
1677 }
fc24389f
JA
1678 }
1679 }
1680
3f39453a 1681 /*
e8457004 1682 * start created threads (TD_CREATED -> TD_RUNNING)
3f39453a 1683 */
fc24389f
JA
1684 for (i = 0; i < thread_number; i++) {
1685 struct thread_data *td = &threads[i];
1686
3f39453a
JA
1687 if (td->runstate != TD_CREATED)
1688 continue;
1689
40ef7f64 1690 td_set_runstate(td, TD_RUNNING);
3f39453a 1691 nr_running++;
2a81240d 1692 nr_started--;
3f39453a
JA
1693 m_rate += td->ratemin;
1694 t_rate += td->rate;
40ef7f64 1695 check_str_update(td, nr_running, t_rate, m_rate);
3f39453a 1696 sem_post(&td->mutex);
fc24389f
JA
1697 }
1698
e8457004
JA
1699 for (i = 0; i < thread_number; i++) {
1700 struct thread_data *td = &threads[i];
1701
b48889bb
JA
1702 if (td->runstate != TD_RUNNING &&
1703 td->runstate != TD_VERIFYING)
e8457004
JA
1704 continue;
1705
40ef7f64 1706 check_str_update(td, nr_running, t_rate, m_rate);
e8457004
JA
1707 }
1708
213b446c 1709 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1710
fc24389f
JA
1711 if (todo)
1712 usleep(100000);
1713 }
02bdd9ba
JA
1714
1715 while (nr_running) {
213b446c 1716 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1717 usleep(10000);
1718 }
fc24389f
JA
1719}
1720
0d80f40d 1721static void show_group_stats(struct group_run_stats *rs, int id)
8867c0a8 1722{
0d80f40d
JA
1723 printf("\nRun status group %d:\n", id);
1724
1725 if (rs->max_run[DDIR_READ])
1726 printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[0], rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
1727 if (rs->max_run[DDIR_WRITE])
1728 printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[1], rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
1729}
1730
1731static void show_run_stats(void)
1732{
1733 struct group_run_stats *runstats, *rs;
557e4102 1734 struct thread_data *td;
8867c0a8
JA
1735 int i;
1736
0d80f40d
JA
1737 runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
1738
1739 for (i = 0; i < groupid + 1; i++) {
1740 rs = &runstats[i];
1741
f6dcd824 1742 memset(rs, 0, sizeof(*rs));
0d80f40d
JA
1743 rs->min_bw[0] = rs->min_run[0] = ~0UL;
1744 rs->min_bw[1] = rs->min_run[1] = ~0UL;
0d80f40d
JA
1745 }
1746
1747 for (i = 0; i < thread_number; i++) {
0d80f40d
JA
1748 unsigned long bw = 0;
1749
557e4102
JA
1750 td = &threads[i];
1751
1752 if (td->error)
1753 continue;
1754
0d80f40d
JA
1755 rs = &runstats[td->groupid];
1756
557e4102
JA
1757 if (td->runtime < rs->min_run[td->ddir])
1758 rs->min_run[td->ddir] = td->runtime;
1759 if (td->runtime > rs->max_run[td->ddir])
1760 rs->max_run[td->ddir] = td->runtime;
0d80f40d 1761
557e4102
JA
1762 if (td->runtime)
1763 bw = td->io_bytes / td->runtime;
1764 if (bw < rs->min_bw[td->ddir])
1765 rs->min_bw[td->ddir] = bw;
1766 if (bw > rs->max_bw[td->ddir])
1767 rs->max_bw[td->ddir] = bw;
0d80f40d 1768
557e4102 1769 rs->io_mb[td->ddir] += td->io_bytes >> 20;
0d80f40d 1770 }
9d489c62 1771
0d80f40d
JA
1772 for (i = 0; i < groupid + 1; i++) {
1773 rs = &runstats[i];
1774
1775 if (rs->max_run[0])
1776 rs->agg[0] = (rs->io_mb[0]*1024*1000) / rs->max_run[0];
1777 if (rs->max_run[1])
1778 rs->agg[1] = (rs->io_mb[1]*1024*1000) / rs->max_run[1];
0d80f40d 1779 }
557e4102
JA
1780
1781 for (i = 0; i < thread_number; i++) {
1782 td = &threads[i];
1783 rs = &runstats[td->groupid];
1784
c4c8f7b3 1785 show_thread_status(td, rs);
557e4102 1786 }
9d489c62
JA
1787
1788 for (i = 0; i < groupid + 1; i++)
1789 show_group_stats(&runstats[i], i);
0d80f40d
JA
1790}
1791
1792int main(int argc, char *argv[])
1793{
27c32a38 1794 memset(run_str, 0, sizeof(run_str));
5961d92c 1795
27c32a38 1796 if (parse_options(argc, argv))
5961d92c 1797 return 1;
7dd1389e 1798
4240cfa1
JA
1799 if (!thread_number) {
1800 printf("Nothing to do\n");
1801 return 1;
1802 }
7dd1389e 1803
fc24389f 1804 run_threads(argv);
0d80f40d 1805 show_run_stats();
fc24389f 1806
892199bd
JA
1807 return 0;
1808}