[PATCH] fio: if no size given for read, just use file size
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
e128065d 29#include <math.h>
49d2caab 30#include <assert.h>
189873de 31#include <pthread.h>
892199bd
JA
32#include <sys/types.h>
33#include <sys/stat.h>
34#include <sys/wait.h>
892199bd
JA
35#include <sys/ipc.h>
36#include <sys/shm.h>
c94deb1c 37#include <sys/ioctl.h>
892199bd
JA
38#include <asm/unistd.h>
39
27c32a38 40#include "fio.h"
892199bd 41
892199bd
JA
42#define MASK (4095)
43
4240cfa1 44#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 45
27c32a38
JA
46int groupid = 0;
47int thread_number = 0;
48char run_str[MAX_JOBS + 1];
49int shm_id = 0;
892199bd 50
02bdd9ba
JA
51/*
52 * thread life cycle
53 */
54enum {
55 TD_NOT_CREATED = 0,
56 TD_CREATED,
e8457004
JA
57 TD_RUNNING,
58 TD_VERIFYING,
02bdd9ba
JA
59 TD_EXITED,
60 TD_REAPED,
61};
62
2c83567e
JA
63/*
64 * The io unit
65 */
66struct io_u {
67 struct iocb iocb;
57d753e3 68 struct timeval start_time;
2c83567e
JA
69 struct timeval issue_time;
70
2c83567e
JA
71 char *buf;
72 unsigned int buflen;
4ac89145 73 unsigned long long offset;
2c83567e
JA
74
75 struct list_head list;
76};
77
62bb4285 78#define should_fsync(td) (td_write(td) && !(td)->odirect)
02983297 79
892199bd
JA
80static sem_t startup_sem;
81
27c32a38
JA
82#define TERMINATE_ALL (-1)
83
84static void terminate_threads(int groupid)
892199bd
JA
85{
86 int i;
87
213b446c
JA
88 for (i = 0; i < thread_number; i++) {
89 struct thread_data *td = &threads[i];
90
27c32a38
JA
91 if (groupid == TERMINATE_ALL || groupid == td->groupid) {
92 td->terminate = 1;
93 td->start_delay = 0;
94 }
213b446c 95 }
02bdd9ba
JA
96}
97
27c32a38 98static void sig_handler(int sig)
946d8870 99{
27c32a38 100 terminate_threads(TERMINATE_ALL);
892199bd
JA
101}
102
5c24b2c4 103static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
104{
105 double sec, usec;
106
107 sec = e->tv_sec - s->tv_sec;
108 usec = e->tv_usec - s->tv_usec;
109 if (sec > 0 && usec < 0) {
110 sec--;
111 usec += 1000000;
112 }
113
114 sec *= (double) 1000000;
115
116 return sec + usec;
117}
118
fd11d7af
JA
119static unsigned long utime_since_now(struct timeval *s)
120{
121 struct timeval t;
122
123 gettimeofday(&t, NULL);
124 return utime_since(s, &t);
125}
126
5c24b2c4 127static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
128{
129 double sec, usec;
130
131 sec = e->tv_sec - s->tv_sec;
132 usec = e->tv_usec - s->tv_usec;
133 if (sec > 0 && usec < 0) {
134 sec--;
135 usec += 1000000;
136 }
137
138 sec *= (double) 1000;
139 usec /= (double) 1000;
140
141 return sec + usec;
142}
143
be33abe4
JA
144static unsigned long mtime_since_now(struct timeval *s)
145{
146 struct timeval t;
147
148 gettimeofday(&t, NULL);
149 return mtime_since(s, &t);
150}
151
98168d55
JA
152static inline unsigned long msec_now(struct timeval *s)
153{
154 return s->tv_sec * 1000 + s->tv_usec / 1000;
155}
156
49d2caab
JA
157static int random_map_free(struct thread_data *td, unsigned long long block)
158{
75b2ab2c
JA
159 unsigned int idx = RAND_MAP_IDX(td, block);
160 unsigned int bit = RAND_MAP_BIT(td, block);
49d2caab
JA
161
162 return (td->file_map[idx] & (1UL << bit)) == 0;
163}
164
165static int get_next_free_block(struct thread_data *td, unsigned long long *b)
892199bd 166{
49d2caab
JA
167 int i;
168
169 *b = 0;
170 i = 0;
171 while ((*b) * td->min_bs < td->io_size) {
172 if (td->file_map[i] != -1UL) {
173 *b += ffz(td->file_map[i]);
174 return 0;
175 }
176
177 *b += BLOCKS_PER_MAP;
178 i++;
179 }
180
181 return 1;
182}
183
184static void mark_random_map(struct thread_data *td, struct io_u *io_u)
185{
186 unsigned long block = io_u->offset / td->min_bs;
187 unsigned int blocks = 0;
188
189 while (blocks < (io_u->buflen / td->min_bs)) {
190 int idx, bit;
191
192 if (!random_map_free(td, block))
193 break;
194
75b2ab2c
JA
195 idx = RAND_MAP_IDX(td, block);
196 bit = RAND_MAP_BIT(td, block);
49d2caab
JA
197
198 assert(idx < td->num_maps);
199
200 td->file_map[idx] |= (1UL << bit);
201 block++;
202 blocks++;
203 }
204
205 if ((blocks * td->min_bs) < io_u->buflen)
206 io_u->buflen = blocks * td->min_bs;
207}
208
209static int get_next_offset(struct thread_data *td, unsigned long long *offset)
210{
75b2ab2c 211 unsigned long long b, rb;
d32d9284 212 long r;
892199bd
JA
213
214 if (!td->sequential) {
49d2caab
JA
215 unsigned long max_blocks = td->io_size / td->min_bs;
216 int loops = 50;
217
218 do {
219 lrand48_r(&td->random_state, &r);
220 b = ((max_blocks - 1) * r / (RAND_MAX+1.0));
75b2ab2c 221 rb = b + (td->file_offset / td->min_bs);
49d2caab 222 loops--;
75b2ab2c 223 } while (!random_map_free(td, rb) && loops);
49d2caab
JA
224
225 if (!loops) {
226 if (get_next_free_block(td, &b))
227 return 1;
228 }
7889f07b 229 } else
49d2caab 230 b = td->last_bytes / td->min_bs;
7889f07b 231
49d2caab 232 *offset = (b * td->min_bs) + td->file_offset;
75b2ab2c
JA
233 if (*offset > td->file_size)
234 return 1;
235
49d2caab 236 return 0;
7889f07b
JA
237}
238
239static unsigned int get_next_buflen(struct thread_data *td)
240{
241 unsigned int buflen;
d32d9284 242 long r;
7889f07b
JA
243
244 if (td->min_bs == td->max_bs)
245 buflen = td->min_bs;
246 else {
d32d9284 247 lrand48_r(&td->bsrange_state, &r);
7889f07b
JA
248 buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
249 buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
892199bd
JA
250 }
251
49d2caab
JA
252 if (buflen > td->io_size - td->this_io_bytes)
253 buflen = td->io_size - td->this_io_bytes;
7889f07b 254
7889f07b 255 return buflen;
892199bd
JA
256}
257
57d753e3
JA
258static inline void add_stat_sample(struct thread_data *td, struct io_stat *is,
259 unsigned long val)
892199bd 260{
57d753e3
JA
261 if (val > is->max_val)
262 is->max_val = val;
263 if (val < is->min_val)
264 is->min_val = val;
265
266 is->val += val;
267 is->val_sq += val * val;
268 is->samples++;
269}
fd1ae4c9 270
a0a9b35b
JA
271static void add_log_sample(struct thread_data *td, struct io_log *log,
272 unsigned long val)
273{
274 if (log->nr_samples == log->max_samples) {
275 int new_size = sizeof(struct io_sample) * log->max_samples * 2;
276
277 log->log = realloc(log->log, new_size);
278 log->max_samples <<= 1;
279 }
280
281 log->log[log->nr_samples].val = val;
282 log->log[log->nr_samples].time = mtime_since_now(&td->start);
283 log->nr_samples++;
284}
285
57d753e3
JA
286static void add_clat_sample(struct thread_data *td, unsigned long msec)
287{
288 add_stat_sample(td, &td->clat_stat, msec);
a0a9b35b
JA
289
290 if (td->lat_log)
291 add_log_sample(td, td->lat_log, msec);
57d753e3 292}
fd1ae4c9 293
57d753e3
JA
294static void add_slat_sample(struct thread_data *td, unsigned long msec)
295{
296 add_stat_sample(td, &td->slat_stat, msec);
297}
fd1ae4c9 298
645785e5 299static void add_bw_sample(struct thread_data *td)
57d753e3
JA
300{
301 unsigned long spent = mtime_since_now(&td->stat_sample_time);
302 unsigned long rate;
303
1d035750 304 if (spent < td->bw_avg_time)
57d753e3
JA
305 return;
306
49d2caab 307 rate = (td->this_io_bytes - td->stat_io_bytes) / spent;
57d753e3
JA
308 add_stat_sample(td, &td->bw_stat, rate);
309
a0a9b35b
JA
310 if (td->bw_log)
311 add_log_sample(td, td->bw_log, rate);
312
57d753e3 313 gettimeofday(&td->stat_sample_time, NULL);
49d2caab 314 td->stat_io_bytes = td->this_io_bytes;
892199bd
JA
315}
316
fd11d7af
JA
317/*
318 * busy looping version for the last few usec
319 */
320static void __usec_sleep(int usec)
321{
322 struct timeval start;
323
324 gettimeofday(&start, NULL);
325 while (utime_since_now(&start) < usec)
3782a8cd 326 nop;
fd11d7af
JA
327}
328
5c24b2c4 329static void usec_sleep(int usec)
892199bd 330{
86184d14
JA
331 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
332 struct timespec rem;
892199bd
JA
333
334 do {
fd11d7af
JA
335 if (usec < 5000) {
336 __usec_sleep(usec);
337 break;
338 }
86184d14
JA
339 rem.tv_sec = rem.tv_nsec = 0;
340 nanosleep(&req, &rem);
341 if (!rem.tv_nsec)
892199bd 342 break;
86184d14
JA
343
344 req.tv_nsec = rem.tv_nsec;
fd11d7af 345 usec = rem.tv_nsec * 1000;
892199bd
JA
346 } while (1);
347}
348
9e850933
JA
349static void rate_throttle(struct thread_data *td, unsigned long time_spent,
350 unsigned int bytes)
86184d14 351{
9e850933
JA
352 unsigned long usec_cycle;
353
4240cfa1
JA
354 if (!td->rate)
355 return;
356
9e850933
JA
357 usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
358
359 if (time_spent < usec_cycle) {
360 unsigned long s = usec_cycle - time_spent;
86184d14
JA
361
362 td->rate_pending_usleep += s;
fad86e6a 363 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
364 usec_sleep(td->rate_pending_usleep);
365 td->rate_pending_usleep = 0;
366 }
4240cfa1 367 } else {
9e850933 368 long overtime = time_spent - usec_cycle;
42b2b9fe 369
4240cfa1
JA
370 td->rate_pending_usleep -= overtime;
371 }
372}
373
5c24b2c4 374static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 375{
7607bc6b 376 unsigned long spent;
4240cfa1
JA
377 unsigned long rate;
378
379 /*
380 * allow a 2 second settle period in the beginning
381 */
7607bc6b 382 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
383 return 0;
384
385 /*
386 * if rate blocks is set, sample is running
387 */
49d2caab 388 if (td->rate_bytes) {
4240cfa1
JA
389 spent = mtime_since(&td->lastrate, now);
390 if (spent < td->ratecycle)
391 return 0;
392
49d2caab 393 rate = (td->this_io_bytes - td->rate_bytes) / spent;
4240cfa1
JA
394 if (rate < td->ratemin) {
395 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 396 if (rate_quit)
27c32a38 397 terminate_threads(td->groupid);
4240cfa1
JA
398 return 1;
399 }
86184d14 400 }
4240cfa1 401
49d2caab 402 td->rate_bytes = td->this_io_bytes;
4240cfa1
JA
403 memcpy(&td->lastrate, now, sizeof(*now));
404 return 0;
86184d14
JA
405}
406
67903a2e
JA
407static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
408{
01f79976
JA
409 if (!td->timeout)
410 return 0;
67903a2e
JA
411 if (mtime_since(&td->start, t) >= td->timeout * 1000)
412 return 1;
413
414 return 0;
415}
416
e8457004
JA
417static void fill_random_bytes(struct thread_data *td,
418 unsigned char *p, unsigned int len)
419{
645785e5 420 unsigned int todo;
40ef7f64 421 double r;
e8457004
JA
422
423 while (len) {
40ef7f64 424 drand48_r(&td->verify_state, &r);
e8457004 425
40ef7f64
JA
426 /*
427 * lrand48_r seems to be broken and only fill the bottom
428 * 32-bits, even on 64-bit archs with 64-bit longs
429 */
430 todo = sizeof(r);
e8457004
JA
431 if (todo > len)
432 todo = len;
433
434 memcpy(p, &r, todo);
435
436 len -= todo;
437 p += todo;
438 }
439}
440
9d0c6ca2
JA
441static void hexdump(void *buffer, int len)
442{
443 unsigned char *p = buffer;
444 int i;
445
446 for (i = 0; i < len; i++)
447 printf("%02x", p[i]);
448 printf("\n");
449}
450
645785e5 451static int verify_io_u(struct io_u *io_u)
e8457004
JA
452{
453 struct verify_header *hdr = (struct verify_header *) io_u->buf;
454 unsigned char *p = (unsigned char *) io_u->buf;
455 struct md5_ctx md5_ctx;
9d0c6ca2 456 int ret;
e8457004 457
840b216f 458 if (hdr->fio_magic != FIO_HDR_MAGIC)
e8457004
JA
459 return 1;
460
461 memset(&md5_ctx, 0, sizeof(md5_ctx));
462 p += sizeof(*hdr);
463 md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
464
9d0c6ca2
JA
465 ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
466 if (ret) {
467 hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
468 hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
469 }
470
471 return ret;
e8457004
JA
472}
473
cfc702bd
JA
474/*
475 * fill body of io_u->buf with random data and add a header with the
476 * (eg) sha1sum of that data.
477 */
e8457004 478static void populate_io_u(struct thread_data *td, struct io_u *io_u)
cfc702bd 479{
e8457004
JA
480 struct md5_ctx md5_ctx;
481 struct verify_header hdr;
482 unsigned char *p = (unsigned char *) io_u->buf;
483
484 hdr.fio_magic = FIO_HDR_MAGIC;
485 hdr.len = io_u->buflen;
486 p += sizeof(hdr);
487 fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
488
489 memset(&md5_ctx, 0, sizeof(md5_ctx));
490 md5_update(&md5_ctx, p, io_u->buflen - sizeof(hdr));
491 memcpy(hdr.md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
492 memcpy(io_u->buf, &hdr, sizeof(hdr));
cfc702bd
JA
493}
494
2c83567e
JA
495static void put_io_u(struct thread_data *td, struct io_u *io_u)
496{
497 list_del(&io_u->list);
498 list_add(&io_u->list, &td->io_u_freelist);
499 td->cur_depth--;
500}
501
f0f3411b
JA
502#define queue_full(td) (list_empty(&(td)->io_u_freelist))
503
e8457004
JA
504static struct io_u *__get_io_u(struct thread_data *td)
505{
506 struct io_u *io_u;
507
f0f3411b 508 if (queue_full(td))
e8457004
JA
509 return NULL;
510
511 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
512 list_del(&io_u->list);
513 list_add(&io_u->list, &td->io_u_busylist);
f4bb2243 514 td->cur_depth++;
e8457004
JA
515 return io_u;
516}
517
2c83567e
JA
518static struct io_u *get_io_u(struct thread_data *td)
519{
520 struct io_u *io_u;
521
e8457004
JA
522 io_u = __get_io_u(td);
523 if (!io_u)
2c83567e
JA
524 return NULL;
525
406e7b7c
JA
526 if (get_next_offset(td, &io_u->offset)) {
527 put_io_u(td, io_u);
49d2caab 528 return NULL;
406e7b7c 529 }
49d2caab 530
b2a369fb
JA
531 io_u->buflen = get_next_buflen(td);
532 if (!io_u->buflen) {
e8457004 533 put_io_u(td, io_u);
7889f07b 534 return NULL;
e8457004 535 }
2c83567e 536
75b2ab2c
JA
537 if (io_u->buflen + io_u->offset > td->file_size)
538 io_u->buflen = td->file_size - io_u->offset;
49d2caab
JA
539
540 if (!td->sequential)
541 mark_random_map(td, io_u);
542
543 td->last_bytes += io_u->buflen;
544
9d0c6ca2 545 if (td->verify)
e8457004 546 populate_io_u(td, io_u);
cfc702bd 547
2c83567e
JA
548 if (td->use_aio) {
549 if (td_read(td))
550 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
551 else
552 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
553 }
554
57d753e3 555 gettimeofday(&io_u->start_time, NULL);
2c83567e
JA
556 return io_u;
557}
558
40ef7f64
JA
559static inline void td_set_runstate(struct thread_data *td, int runstate)
560{
561 td->old_runstate = td->runstate;
562 td->runstate = runstate;
563}
564
645785e5
JA
565static int get_next_verify(struct thread_data *td,
566 unsigned long long *offset, unsigned int *len)
567{
568 struct io_piece *ipo;
569
570 if (list_empty(&td->io_hist_list))
571 return 1;
572
573 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
574 list_del(&ipo->list);
575
576 *offset = ipo->offset;
577 *len = ipo->len;
578 free(ipo);
579 return 0;
580}
581
9d0c6ca2
JA
582static void prune_io_piece_log(struct thread_data *td)
583{
584 struct io_piece *ipo;
585
586 while (!list_empty(&td->io_hist_list)) {
587 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
588
589 list_del(&ipo->list);
590 free(ipo);
591 }
592}
593
9d0c6ca2
JA
594/*
595 * log a succesful write, so we can unwind the log for verify
596 */
597static void log_io_piece(struct thread_data *td, struct io_u *io_u)
598{
49d2caab 599 struct io_piece *ipo = malloc(sizeof(struct io_piece));
9d0c6ca2
JA
600 struct list_head *entry;
601
602 INIT_LIST_HEAD(&ipo->list);
603 ipo->offset = io_u->offset;
604 ipo->len = io_u->buflen;
605
49d2caab
JA
606 /*
607 * for random io where the writes extend the file, it will typically
608 * be laid out with the block scattered as written. it's faster to
609 * read them in in that order again, so don't sort
610 */
611 if (td->sequential || !td->overwrite) {
9d0c6ca2
JA
612 list_add_tail(&ipo->list, &td->io_hist_list);
613 return;
614 }
615
616 /*
617 * for random io, sort the list so verify will run faster
618 */
619 entry = &td->io_hist_list;
620 while ((entry = entry->prev) != &td->io_hist_list) {
621 struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
622
9d0c6ca2
JA
623 if (__ipo->offset < ipo->offset)
624 break;
625 }
626
627 list_add(&ipo->list, entry);
628}
629
91fc5dc9 630static void do_sync_verify(struct thread_data *td)
cfc702bd 631{
40ef7f64 632 struct timeval t;
e8457004 633 struct io_u *io_u = NULL;
645785e5 634 int ret;
e8457004 635
40ef7f64 636 td_set_runstate(td, TD_VERIFYING);
e8457004
JA
637
638 io_u = __get_io_u(td);
639
40ef7f64 640 if (!td->odirect) {
49d2caab 641 if (fadvise(td->fd, td->file_offset, td->io_size, POSIX_FADV_DONTNEED) < 0) {
40ef7f64
JA
642 td->error = errno;
643 goto out;
644 }
645 }
646
e8457004
JA
647 do {
648 if (td->terminate)
649 break;
40ef7f64
JA
650
651 gettimeofday(&t, NULL);
652 if (runtime_exceeded(td, &t))
653 break;
654
645785e5
JA
655 if (get_next_verify(td, &io_u->offset, &io_u->buflen))
656 break;
657
658 if (td->cur_off != io_u->offset) {
659 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
660 td->error = errno;
661 break;
662 }
663 }
e8457004
JA
664
665 ret = read(td->fd, io_u->buf, io_u->buflen);
666 if (ret < (int) io_u->buflen) {
667 if (ret == -1) {
668 td->error = errno;
669 break;
670 } else if (!ret)
671 break;
672 else
673 io_u->buflen = ret;
674 }
675
645785e5 676 if (verify_io_u(io_u))
e8457004
JA
677 break;
678
645785e5 679 td->cur_off = io_u->offset + io_u->buflen;
e8457004
JA
680 } while (1);
681
682out:
40ef7f64 683 td_set_runstate(td, TD_RUNNING);
e8457004 684 put_io_u(td, io_u);
cfc702bd
JA
685}
686
43000118 687static void do_sync_io(struct thread_data *td)
892199bd 688{
7889f07b 689 unsigned long msec, usec;
e8457004 690 struct io_u *io_u = NULL;
2c83567e 691 struct timeval e;
892199bd 692
49d2caab 693 while (td->this_io_bytes < td->io_size) {
892199bd
JA
694 int ret;
695
696 if (td->terminate)
697 break;
698
2c83567e 699 io_u = get_io_u(td);
7889f07b
JA
700 if (!io_u)
701 break;
2c83567e 702
63a09e51
JA
703 if (td->cur_off != io_u->offset) {
704 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
705 td->error = errno;
706 break;
707 }
892199bd
JA
708 }
709
02983297 710 if (td_read(td))
2c83567e 711 ret = read(td->fd, io_u->buf, io_u->buflen);
892199bd 712 else
2c83567e 713 ret = write(td->fd, io_u->buf, io_u->buflen);
892199bd 714
2c83567e 715 if (ret < (int) io_u->buflen) {
892199bd
JA
716 if (ret == -1)
717 td->error = errno;
718 break;
719 }
720
62bb4285 721 if (td_write(td))
645785e5
JA
722 log_io_piece(td, io_u);
723
4240cfa1 724 td->io_blocks++;
49d2caab
JA
725 td->io_bytes += io_u->buflen;
726 td->this_io_bytes += io_u->buflen;
63a09e51 727 td->cur_off = io_u->offset + io_u->buflen;
4240cfa1 728
86184d14
JA
729 gettimeofday(&e, NULL);
730
57d753e3 731 usec = utime_since(&io_u->start_time, &e);
86184d14 732
9e850933 733 rate_throttle(td, usec, io_u->buflen);
892199bd 734
4240cfa1
JA
735 if (check_min_rate(td, &e)) {
736 td->error = ENODATA;
737 break;
738 }
892199bd 739
4240cfa1 740 msec = usec / 1000;
57d753e3 741 add_clat_sample(td, msec);
645785e5 742 add_bw_sample(td);
67903a2e
JA
743
744 if (runtime_exceeded(td, &e))
745 break;
2c83567e 746
cdf92433 747 put_io_u(td, io_u);
e8457004 748 io_u = NULL;
cdf92433 749
e97712ed
JA
750 if (td->thinktime)
751 usec_sleep(td->thinktime);
752
cdf92433
JA
753 if (should_fsync(td) && td->fsync_blocks &&
754 (td->io_blocks % td->fsync_blocks) == 0)
755 fsync(td->fd);
892199bd
JA
756 }
757
e8457004
JA
758 if (io_u)
759 put_io_u(td, io_u);
760
4240cfa1 761 if (should_fsync(td))
892199bd 762 fsync(td->fd);
892199bd 763}
43000118 764
1ad72b11
JA
765static int io_u_getevents(struct thread_data *td, int min, int max,
766 struct timespec *t)
767{
768 int r;
769
770 do {
771 r = io_getevents(td->aio_ctx, min, max, td->aio_events, t);
772 if (r != -EAGAIN && r != -EINTR)
773 break;
774 } while (1);
775
776 return r;
777}
778
2c83567e 779static int io_u_queue(struct thread_data *td, struct io_u *io_u)
56b0eff0 780{
2c83567e 781 struct iocb *iocb = &io_u->iocb;
56b0eff0
JA
782 int ret;
783
784 do {
254605cd 785 ret = io_submit(td->aio_ctx, 1, &iocb);
56b0eff0
JA
786 if (ret == 1)
787 return 0;
406e7b7c 788 else if (ret == -EAGAIN)
56b0eff0 789 usleep(100);
406e7b7c 790 else if (ret == -EINTR)
a592bd33 791 continue;
56b0eff0
JA
792 else
793 break;
794 } while (1);
795
a592bd33 796 return ret;
56b0eff0
JA
797}
798
98168d55 799#define iocb_time(iocb) ((unsigned long) (iocb)->data)
2c83567e
JA
800#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
801
f0f3411b 802static int ios_completed(struct thread_data *td, int nr)
2c83567e
JA
803{
804 unsigned long msec;
805 struct io_u *io_u;
806 struct timeval e;
9e850933 807 int i, bytes_done;
2c83567e 808
f0f3411b 809 gettimeofday(&e, NULL);
2c83567e 810
9e850933 811 for (i = 0, bytes_done = 0; i < nr; i++) {
2c83567e
JA
812 io_u = ev_to_iou(td->aio_events + i);
813
f0f3411b 814 td->io_blocks++;
49d2caab
JA
815 td->io_bytes += io_u->buflen;
816 td->this_io_bytes += io_u->buflen;
8c033f93 817
f0f3411b 818 msec = mtime_since(&io_u->issue_time, &e);
2c83567e 819
f0f3411b 820 add_clat_sample(td, msec);
645785e5
JA
821 add_bw_sample(td);
822
62bb4285 823 if (td_write(td))
645785e5 824 log_io_piece(td, io_u);
2c83567e 825
f4bb2243 826 bytes_done += io_u->buflen;
2c83567e
JA
827 put_io_u(td, io_u);
828 }
9e850933
JA
829
830 return bytes_done;
2c83567e
JA
831}
832
833static void cleanup_pending_aio(struct thread_data *td)
834{
835 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
836 struct list_head *entry, *n;
837 struct io_u *io_u;
838 int r;
839
840 /*
841 * get immediately available events, if any
842 */
1ad72b11 843 r = io_u_getevents(td, 0, td->cur_depth, &ts);
2c83567e 844 if (r > 0)
f0f3411b 845 ios_completed(td, r);
2c83567e
JA
846
847 /*
848 * now cancel remaining active events
849 */
850 list_for_each_safe(entry, n, &td->io_u_busylist) {
851 io_u = list_entry(entry, struct io_u, list);
852
853 r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events);
854 if (!r)
855 put_io_u(td, io_u);
856 }
857
858 if (td->cur_depth) {
1ad72b11 859 r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
2c83567e 860 if (r > 0)
f0f3411b 861 ios_completed(td, r);
2c83567e
JA
862 }
863}
98168d55 864
d32d9284
JA
865static int async_do_verify(struct thread_data *td, struct io_u **io_u)
866{
867 struct io_u *v_io_u = *io_u;
868 int ret = 0;
869
870 if (v_io_u) {
645785e5 871 ret = verify_io_u(v_io_u);
d32d9284
JA
872 put_io_u(td, v_io_u);
873 *io_u = NULL;
874 }
875
876 return ret;
877}
878
91fc5dc9 879static void do_async_verify(struct thread_data *td)
cfc702bd 880{
f4bb2243 881 struct timeval t;
d32d9284 882 struct io_u *io_u, *v_io_u = NULL;
645785e5 883 int ret;
f4bb2243
JA
884
885 td_set_runstate(td, TD_VERIFYING);
886
f4bb2243
JA
887 do {
888 if (td->terminate)
889 break;
890
891 gettimeofday(&t, NULL);
892 if (runtime_exceeded(td, &t))
893 break;
894
895 io_u = __get_io_u(td);
896 if (!io_u)
897 break;
898
645785e5
JA
899 if (get_next_verify(td, &io_u->offset, &io_u->buflen)) {
900 put_io_u(td, io_u);
901 break;
f4bb2243
JA
902 }
903
904 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
905 ret = io_u_queue(td, io_u);
906 if (ret) {
907 put_io_u(td, io_u);
908 td->error = ret;
909 break;
910 }
911
f0f3411b
JA
912 /*
913 * we have one pending to verify, do that while the next
914 * we are doing io on the next one
915 */
d32d9284
JA
916 if (async_do_verify(td, &v_io_u))
917 break;
f0f3411b 918
1ad72b11 919 ret = io_u_getevents(td, 1, 1, NULL);
f0f3411b
JA
920 if (ret != 1) {
921 if (ret < 0)
922 td->error = ret;
f4bb2243
JA
923 break;
924 }
925
f0f3411b 926 v_io_u = ev_to_iou(td->aio_events);
f4bb2243 927
645785e5 928 td->cur_off = v_io_u->offset + v_io_u->buflen;
f0f3411b
JA
929
930 /*
d32d9284 931 * if we can't submit more io, we need to verify now
f0f3411b 932 */
d32d9284
JA
933 if (queue_full(td) && async_do_verify(td, &v_io_u))
934 break;
935
f4bb2243
JA
936 } while (1);
937
d32d9284 938 async_do_verify(td, &v_io_u);
f0f3411b 939
f4bb2243
JA
940 if (td->cur_depth)
941 cleanup_pending_aio(td);
942
943 td_set_runstate(td, TD_RUNNING);
cfc702bd
JA
944}
945
43000118
JA
946static void do_async_io(struct thread_data *td)
947{
948 struct timeval s, e;
7889f07b 949 unsigned long usec;
43000118 950
49d2caab 951 while (td->this_io_bytes < td->io_size) {
43000118
JA
952 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
953 struct timespec *timeout;
2c83567e
JA
954 int ret, min_evts = 0;
955 struct io_u *io_u;
9e850933 956 unsigned int bytes_done;
43000118
JA
957
958 if (td->terminate)
959 break;
960
2c83567e 961 io_u = get_io_u(td);
7889f07b
JA
962 if (!io_u)
963 break;
43000118 964
57d753e3 965 memcpy(&s, &io_u->start_time, sizeof(s));
8baf1bcc 966
2c83567e 967 ret = io_u_queue(td, io_u);
56b0eff0 968 if (ret) {
a3fdb993 969 put_io_u(td, io_u);
a592bd33 970 td->error = ret;
43000118
JA
971 break;
972 }
973
57d753e3
JA
974 gettimeofday(&io_u->issue_time, NULL);
975 add_slat_sample(td, mtime_since(&io_u->start_time, &io_u->issue_time));
2c83567e 976 if (td->cur_depth < td->aio_depth) {
43000118
JA
977 timeout = &ts;
978 min_evts = 0;
979 } else {
980 timeout = NULL;
981 min_evts = 1;
982 }
983
1ad72b11 984 ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
43000118 985 if (ret < 0) {
406e7b7c 986 td->error = ret;
43000118
JA
987 break;
988 } else if (!ret)
989 continue;
990
f0f3411b 991 bytes_done = ios_completed(td, ret);
43000118 992
98168d55
JA
993 /*
994 * the rate is batched for now, it should work for batches
995 * of completions except the very first one which may look
996 * a little bursty
997 */
2c83567e 998 gettimeofday(&e, NULL);
43000118
JA
999 usec = utime_since(&s, &e);
1000
9e850933 1001 rate_throttle(td, usec, bytes_done);
43000118
JA
1002
1003 if (check_min_rate(td, &e)) {
1004 td->error = ENODATA;
1005 break;
1006 }
67903a2e
JA
1007
1008 if (runtime_exceeded(td, &e))
1009 break;
765d9223
JA
1010
1011 if (td->thinktime)
1012 usec_sleep(td->thinktime);
cdf92433
JA
1013
1014 if (should_fsync(td) && td->fsync_blocks &&
1015 (td->io_blocks % td->fsync_blocks) == 0)
1016 fsync(td->fd);
43000118 1017 }
56b0eff0 1018
2c83567e
JA
1019 if (td->cur_depth)
1020 cleanup_pending_aio(td);
4ac89145
JA
1021
1022 if (should_fsync(td))
1023 fsync(td->fd);
56b0eff0
JA
1024}
1025
1026static void cleanup_aio(struct thread_data *td)
1027{
254605cd
JA
1028 io_destroy(td->aio_ctx);
1029
43000118
JA
1030 if (td->aio_events)
1031 free(td->aio_events);
43000118
JA
1032}
1033
1034static int init_aio(struct thread_data *td)
1035{
254605cd 1036 if (io_queue_init(td->aio_depth, &td->aio_ctx)) {
43000118
JA
1037 td->error = errno;
1038 return 1;
1039 }
1040
43000118 1041 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
43000118
JA
1042 return 0;
1043}
1044
2c83567e
JA
1045static void cleanup_io_u(struct thread_data *td)
1046{
1047 struct list_head *entry, *n;
1048 struct io_u *io_u;
1049
1050 list_for_each_safe(entry, n, &td->io_u_freelist) {
1051 io_u = list_entry(entry, struct io_u, list);
1052
1053 list_del(&io_u->list);
2c83567e
JA
1054 free(io_u);
1055 }
6b71c826 1056
99c6704f
JA
1057 if (td->mem_type == MEM_MALLOC)
1058 free(td->orig_buffer);
1059 else if (td->mem_type == MEM_SHM) {
1060 struct shmid_ds sbuf;
1061
1062 shmdt(td->orig_buffer);
1063 shmctl(td->shm_id, IPC_RMID, &sbuf);
1064 }
2c83567e
JA
1065}
1066
99c6704f 1067static int init_io_u(struct thread_data *td)
2c83567e
JA
1068{
1069 struct io_u *io_u;
99c6704f 1070 int i, max_units, mem_size;
6b71c826 1071 char *p;
2c83567e
JA
1072
1073 if (!td->use_aio)
1074 max_units = 1;
1075 else
1076 max_units = td->aio_depth;
1077
7889f07b 1078 mem_size = td->max_bs * max_units + MASK;
99c6704f
JA
1079
1080 if (td->mem_type == MEM_MALLOC)
1081 td->orig_buffer = malloc(mem_size);
1082 else if (td->mem_type == MEM_SHM) {
1083 td->shm_id = shmget(IPC_PRIVATE, mem_size, IPC_CREAT | 0600);
1084 if (td->shm_id < 0) {
1085 td->error = errno;
1086 perror("shmget");
1087 return 1;
1088 }
1089
1090 td->orig_buffer = shmat(td->shm_id, NULL, 0);
1091 if (td->orig_buffer == (void *) -1) {
1092 td->error = errno;
1093 perror("shmat");
1094 return 1;
1095 }
1096 }
6b71c826 1097
2c83567e
JA
1098 INIT_LIST_HEAD(&td->io_u_freelist);
1099 INIT_LIST_HEAD(&td->io_u_busylist);
645785e5 1100 INIT_LIST_HEAD(&td->io_hist_list);
2c83567e 1101
99c6704f 1102 p = ALIGN(td->orig_buffer);
2c83567e
JA
1103 for (i = 0; i < max_units; i++) {
1104 io_u = malloc(sizeof(*io_u));
1105 memset(io_u, 0, sizeof(*io_u));
1106 INIT_LIST_HEAD(&io_u->list);
1107
7889f07b 1108 io_u->buf = p + td->max_bs * i;
2c83567e
JA
1109 list_add(&io_u->list, &td->io_u_freelist);
1110 }
99c6704f
JA
1111
1112 return 0;
2c83567e
JA
1113}
1114
02983297
JA
1115static int create_file(struct thread_data *td)
1116{
7889f07b 1117 unsigned long long left;
645785e5 1118 unsigned int bs;
02983297 1119 char *b;
645785e5 1120 int r;
02983297 1121
02983297
JA
1122 /*
1123 * unless specifically asked for overwrite, let normal io extend it
1124 */
62bb4285 1125 if (td_write(td) && !td->overwrite)
02983297
JA
1126 return 0;
1127
57d753e3
JA
1128 if (!td->file_size) {
1129 fprintf(stderr, "Need size for create\n");
1130 td->error = EINVAL;
1131 return 1;
1132 }
1133
42fd89a7
JA
1134 printf("Client%d: Laying out IO file\n", td->thread_number);
1135
02983297
JA
1136 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1137 if (td->fd < 0) {
1138 td->error = errno;
1139 return 1;
1140 }
1141
c94deb1c
JA
1142 if (ftruncate(td->fd, td->file_size) == -1) {
1143 td->error = errno;
1144 return 1;
1145 }
1146
49d2caab 1147 td->io_size = td->file_size;
7889f07b
JA
1148 b = malloc(td->max_bs);
1149 memset(b, 0, td->max_bs);
1150
1151 left = td->file_size;
1152 while (left) {
1153 bs = td->max_bs;
1154 if (bs > left)
1155 bs = left;
02983297 1156
7889f07b 1157 r = write(td->fd, b, bs);
02983297 1158
645785e5 1159 if (r == (int) bs) {
7889f07b 1160 left -= bs;
02983297 1161 continue;
7889f07b 1162 } else {
02983297
JA
1163 if (r < 0)
1164 td->error = errno;
1165 else
1166 td->error = EIO;
1167
1168 break;
1169 }
1170 }
1171
fc097bfe
JA
1172 if (td->create_fsync)
1173 fsync(td->fd);
1174
02983297
JA
1175 close(td->fd);
1176 td->fd = -1;
1177 free(b);
1178 return 0;
1179}
1180
1181static int file_exists(struct thread_data *td)
1182{
1183 struct stat st;
1184
1185 if (stat(td->file_name, &st) != -1)
1186 return 1;
1187
1188 return errno != ENOENT;
1189}
1190
c94deb1c 1191static int get_file_size(struct thread_data *td)
02983297 1192{
c94deb1c 1193 size_t bytes = 0;
02983297 1194 struct stat st;
c94deb1c
JA
1195
1196 if (fstat(td->fd, &st) == -1) {
1197 td->error = errno;
1198 return 1;
1199 }
1200
1201 /*
1202 * if block device, get size via BLKGETSIZE64 ioctl. try that as well
1203 * if this is a link, fall back to st.st_size if it fails
1204 */
1205 if (S_ISBLK(st.st_mode) || S_ISLNK(st.st_mode)) {
1206 if (ioctl(td->fd, BLKGETSIZE64, &bytes)) {
1207 if (S_ISBLK(st.st_mode)) {
1208 td->error = errno;
1209 return 1;
1210 } else
1211 bytes = st.st_size;
1212 }
1213 } else
1214 bytes = st.st_size;
1215
1216 if (td_read(td)) {
3ceeecf1
JA
1217 if (!td->file_size)
1218 td->file_size = bytes;
1219 else if (td->file_size > bytes)
c94deb1c
JA
1220 bytes = td->file_size;
1221 } else {
1222 if (!td->file_size)
1223 td->file_size = 1024 * 1024 * 1024;
1224
1225 bytes = td->file_size;
1226 }
1227
1228 if (td->file_offset > bytes) {
1229 fprintf(stderr, "Client%d: offset larger than length\n", td->thread_number);
1230 return 1;
1231 }
1232
1233 td->io_size = bytes - td->file_offset;
1234 if (td->io_size == 0) {
1235 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
1236 td->error = EINVAL;
1237 return 1;
1238 }
1239
1240 return 0;
1241}
1242
1243static int setup_file(struct thread_data *td)
1244{
02983297
JA
1245 int flags = 0;
1246
1247 if (!file_exists(td)) {
1248 if (!td->create_file) {
1249 td->error = ENOENT;
1250 return 1;
1251 }
1252 if (create_file(td))
1253 return 1;
1254 }
1255
1256 if (td->odirect)
1257 flags |= O_DIRECT;
1258
1259 if (td_read(td))
1260 td->fd = open(td->file_name, flags | O_RDONLY);
1261 else {
1262 if (!td->overwrite)
1263 flags |= O_TRUNC;
74b4b5fb
JA
1264 if (td->sync_io)
1265 flags |= O_SYNC;
e8457004
JA
1266 if (td->verify)
1267 flags |= O_RDWR;
1268 else
1269 flags |= O_WRONLY;
02983297 1270
e8457004 1271 td->fd = open(td->file_name, flags | O_CREAT, 0600);
02983297
JA
1272 }
1273
1274 if (td->fd == -1) {
1275 td->error = errno;
1276 return 1;
1277 }
1278
c94deb1c 1279 if (get_file_size(td))
49d2caab 1280 return 1;
49d2caab 1281
62bb4285 1282 if (td_write(td) && ftruncate(td->fd, td->file_size) == -1) {
c94deb1c 1283 td->error = errno;
02983297
JA
1284 return 1;
1285 }
1286
b95799ca 1287 if (td->invalidate_cache) {
c94deb1c 1288 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
b95799ca
JA
1289 td->error = errno;
1290 return 1;
1291 }
1292 }
1293
02983297
JA
1294 return 0;
1295}
1296
d32d9284
JA
1297static void clear_io_state(struct thread_data *td)
1298{
9d0c6ca2
JA
1299 if (!td->use_aio)
1300 lseek(td->fd, SEEK_SET, 0);
1301
d32d9284 1302 td->cur_off = 0;
49d2caab
JA
1303 td->last_bytes = 0;
1304 td->stat_io_bytes = 0;
1305 td->this_io_bytes = 0;
1306
1307 if (td->file_map)
1308 memset(td->file_map, 0, td->num_maps * sizeof(long));
d32d9284
JA
1309}
1310
f6dcd824
JA
1311static void update_rusage_stat(struct thread_data *td)
1312{
1313 if (!td->runtime)
1314 return;
1315
1316 getrusage(RUSAGE_SELF, &td->ru_end);
1317
1318 td->usr_time += mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
1319 td->sys_time += mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
1320 td->ctx += td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
1321
1322
1323 memcpy(&td->ru_start, &td->ru_end, sizeof(td->ru_end));
1324}
1325
189873de 1326static void *thread_main(void *data)
892199bd 1327{
189873de 1328 struct thread_data *td = data;
02983297 1329 int ret = 1;
892199bd 1330
7292613b 1331 setsid();
892199bd
JA
1332 td->pid = getpid();
1333
99c6704f
JA
1334 if (init_io_u(td))
1335 goto err;
2c83567e 1336
18e0b78c
JA
1337 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
1338 td->error = errno;
1339 goto err;
1340 }
1341
43000118
JA
1342 if (td->use_aio && init_aio(td))
1343 goto err;
1344
f737299d 1345 if (td->ioprio) {
892199bd
JA
1346 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
1347 td->error = errno;
599002b3 1348 goto err;
892199bd
JA
1349 }
1350 }
1351
1352 sem_post(&startup_sem);
1353 sem_wait(&td->mutex);
43000118 1354
fc097bfe
JA
1355 if (!td->create_serialize && setup_file(td))
1356 goto err;
1357
49d2caab
JA
1358 if (init_random_state(td))
1359 goto err;
1360
293753bb 1361 while (td->loops--) {
f6dcd824
JA
1362 getrusage(RUSAGE_SELF, &td->ru_start);
1363 gettimeofday(&td->start, NULL);
1364 memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
293753bb
JA
1365
1366 if (td->ratemin)
1367 memcpy(&td->lastrate, &td->stat_sample_time, sizeof(td->lastrate));
7292613b 1368
d32d9284 1369 clear_io_state(td);
9d0c6ca2 1370 prune_io_piece_log(td);
fd1ae4c9 1371
b2de0ed2 1372 if (!td->use_aio)
b6794fbf 1373 do_sync_io(td);
b2de0ed2
JA
1374 else
1375 do_async_io(td);
1376
91fc5dc9
JA
1377 if (td->error)
1378 break;
1379
f6dcd824
JA
1380 td->runtime += mtime_since_now(&td->start);
1381 update_rusage_stat(td);
1382
b2de0ed2
JA
1383 if (!td->verify)
1384 continue;
cfc702bd 1385
b2de0ed2 1386 clear_io_state(td);
d32d9284 1387
91fc5dc9
JA
1388 if (!td->use_aio)
1389 do_sync_verify(td);
1390 else
1391 do_async_verify(td);
1392
1393 if (td->error)
1394 break;
b6794fbf 1395 }
7292613b 1396
892199bd 1397 ret = 0;
a0a9b35b
JA
1398
1399 if (td->bw_log)
1400 finish_log(td, td->bw_log, "bw");
1401 if (td->lat_log)
1402 finish_log(td, td->lat_log, "lat");
4ac89145 1403
98dd52d6 1404 if (exitall_on_terminate)
27c32a38 1405 terminate_threads(td->groupid);
98dd52d6 1406
892199bd 1407err:
7292613b
JA
1408 if (td->fd != -1) {
1409 close(td->fd);
1410 td->fd = -1;
1411 }
4ac89145
JA
1412 if (td->use_aio)
1413 cleanup_aio(td);
2c83567e 1414 cleanup_io_u(td);
599002b3 1415 if (ret) {
892199bd 1416 sem_post(&startup_sem);
599002b3
JA
1417 sem_wait(&td->mutex);
1418 }
40ef7f64 1419 td_set_runstate(td, TD_EXITED);
189873de
JA
1420 return NULL;
1421
1422}
1423
1424static void *fork_main(int shm_id, int offset)
1425{
1426 struct thread_data *td;
1427 void *data;
1428
1429 data = shmat(shm_id, NULL, 0);
1430 if (data == (void *) -1) {
1431 perror("shmat");
1432 return NULL;
1433 }
1434
1435 td = data + offset * sizeof(struct thread_data);
1436 thread_main(td);
4240cfa1 1437 shmdt(data);
892199bd
JA
1438 return NULL;
1439}
1440
57d753e3
JA
1441static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
1442 double *mean, double *dev)
1443{
1444 double n;
1445
1446 if (is->samples == 0)
1447 return 0;
1448
1449 *min = is->min_val;
1450 *max = is->max_val;
1451
1452 n = (double) is->samples;
1453 *mean = (double) is->val / n;
1454 *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
1455 return 1;
1456}
1457
557e4102
JA
1458static void show_thread_status(struct thread_data *td,
1459 struct group_run_stats *rs)
892199bd
JA
1460{
1461 int prio, prio_class;
f6dcd824 1462 unsigned long min, max, bw = 0;
92b229ed 1463 double mean, dev, usr_cpu, sys_cpu;
892199bd 1464
49d2caab 1465 if (!td->io_bytes && !td->error)
213b446c
JA
1466 return;
1467
892199bd 1468 if (td->runtime)
49d2caab 1469 bw = td->io_bytes / td->runtime;
892199bd
JA
1470
1471 prio = td->ioprio & 0xff;
1472 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
1473
f6dcd824 1474 printf("Client%d (g=%d): err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->groupid, td->error, td->io_bytes >> 20, bw, td->runtime);
fd1ae4c9 1475
57d753e3
JA
1476 if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev))
1477 printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1478 if (calc_lat(&td->clat_stat, &min, &max, &mean, &dev))
1479 printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
557e4102
JA
1480 if (calc_lat(&td->bw_stat, &min, &max, &mean, &dev)) {
1481 double p_of_agg;
1482
1483 p_of_agg = mean * 100 / (double) rs->agg[td->ddir];
1484 printf(" bw (KiB/s) : min=%5lu, max=%5lu, per=%3.2f%%, avg=%5.02f, dev=%5.02f\n", min, max, p_of_agg, mean, dev);
1485 }
92b229ed
JA
1486
1487 if (td->runtime) {
f6dcd824
JA
1488 usr_cpu = (double) td->usr_time * 100 / (double) td->runtime;
1489 sys_cpu = (double) td->sys_time * 100 / (double) td->runtime;
92b229ed
JA
1490 } else {
1491 usr_cpu = 0;
1492 sys_cpu = 0;
1493 }
1494
f6dcd824 1495 printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, td->ctx);
892199bd
JA
1496}
1497
3f39453a 1498static void print_thread_status(struct thread_data *td, int nr_running,
8dbff0b1 1499 int t_rate, int m_rate)
3f39453a 1500{
3f39453a
JA
1501 printf("Threads now running: %d", nr_running);
1502 if (m_rate || t_rate)
1503 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
8dbff0b1
JA
1504 printf(" : [%s]\r", run_str);
1505 fflush(stdout);
3f39453a
JA
1506}
1507
40ef7f64
JA
1508static void check_str_update(struct thread_data *td, int n, int t, int m)
1509{
1510 char c = run_str[td->thread_number - 1];
1511
1512 if (td->runstate == td->old_runstate)
1513 return;
1514
1515 switch (td->runstate) {
1516 case TD_REAPED:
1517 c = '_';
1518 break;
f4bb2243
JA
1519 case TD_EXITED:
1520 c = 'E';
1521 break;
40ef7f64 1522 case TD_RUNNING:
af678352
JA
1523 if (td_read(td)) {
1524 if (td->sequential)
1525 c = 'R';
1526 else
1527 c = 'r';
1528 } else {
1529 if (td->sequential)
1530 c = 'W';
1531 else
1532 c = 'w';
1533 }
40ef7f64
JA
1534 break;
1535 case TD_VERIFYING:
1536 c = 'V';
1537 break;
1538 case TD_CREATED:
1539 c = 'C';
1540 break;
1541 case TD_NOT_CREATED:
1542 c = 'P';
1543 break;
1544 default:
1545 printf("state %d\n", td->runstate);
1546 }
1547
1548 run_str[td->thread_number - 1] = c;
1549 print_thread_status(td, n, t, m);
1550 td->old_runstate = td->runstate;
1551}
1552
213b446c 1553static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1554{
213b446c 1555 int i;
02bdd9ba 1556
3f39453a
JA
1557 /*
1558 * reap exited threads (TD_EXITED -> TD_REAPED)
1559 */
02bdd9ba
JA
1560 for (i = 0; i < thread_number; i++) {
1561 struct thread_data *td = &threads[i];
1562
40ef7f64
JA
1563 check_str_update(td, *nr_running, *t_rate, *m_rate);
1564
213b446c
JA
1565 if (td->runstate != TD_EXITED)
1566 continue;
02bdd9ba 1567
40ef7f64 1568 td_set_runstate(td, TD_REAPED);
189873de
JA
1569
1570 if (td->use_thread) {
1571 long ret;
1572
1573 if (pthread_join(td->thread, (void *) &ret))
1574 perror("thread_join");
1575 } else
1576 waitpid(td->pid, NULL, 0);
1577
213b446c
JA
1578 (*nr_running)--;
1579 (*m_rate) -= td->ratemin;
1580 (*t_rate) -= td->rate;
40ef7f64 1581 check_str_update(td, *nr_running, *t_rate, *m_rate);
213b446c 1582 }
02bdd9ba
JA
1583}
1584
fc24389f
JA
1585static void run_threads(char *argv[])
1586{
be33abe4 1587 struct timeval genesis;
fc24389f
JA
1588 struct thread_data *td;
1589 unsigned long spent;
2a81240d 1590 int i, todo, nr_running, m_rate, t_rate, nr_started;
fc24389f 1591
fc24389f
JA
1592 printf("Starting %d threads\n", thread_number);
1593 fflush(stdout);
1594
7292613b
JA
1595 signal(SIGINT, sig_handler);
1596
fc24389f 1597 todo = thread_number;
02bdd9ba 1598 nr_running = 0;
2a81240d 1599 nr_started = 0;
213b446c 1600 m_rate = t_rate = 0;
fc24389f 1601
8bdcfab5
JA
1602 for (i = 0; i < thread_number; i++) {
1603 td = &threads[i];
1604
fc097bfe
JA
1605 if (!td->create_serialize)
1606 continue;
1607
8bdcfab5
JA
1608 /*
1609 * do file setup here so it happens sequentially,
1610 * we don't want X number of threads getting their
1611 * client data interspersed on disk
1612 */
1613 if (setup_file(td)) {
40ef7f64 1614 td_set_runstate(td, TD_REAPED);
8bdcfab5
JA
1615 todo--;
1616 }
1617 }
1618
1619 gettimeofday(&genesis, NULL);
1620
213b446c 1621 while (todo) {
3f39453a
JA
1622 /*
1623 * create threads (TD_NOT_CREATED -> TD_CREATED)
1624 */
fc24389f
JA
1625 for (i = 0; i < thread_number; i++) {
1626 td = &threads[i];
1627
02bdd9ba 1628 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1629 continue;
1630
213b446c
JA
1631 /*
1632 * never got a chance to start, killed by other
1633 * thread for some reason
1634 */
1635 if (td->terminate) {
1636 todo--;
1637 continue;
1638 }
1639
fc24389f 1640 if (td->start_delay) {
be33abe4 1641 spent = mtime_since_now(&genesis);
fc24389f
JA
1642
1643 if (td->start_delay * 1000 > spent)
1644 continue;
1645 }
1646
2a81240d 1647 if (td->stonewall && (nr_started || nr_running))
ea6f96a2 1648 break;
2a81240d 1649
40ef7f64
JA
1650 td_set_runstate(td, TD_CREATED);
1651 check_str_update(td, nr_running, t_rate, m_rate);
fc24389f
JA
1652 sem_init(&startup_sem, 1, 1);
1653 todo--;
2a81240d 1654 nr_started++;
fc24389f 1655
189873de
JA
1656 if (td->use_thread) {
1657 if (pthread_create(&td->thread, NULL, thread_main, td)) {
1658 perror("thread_create");
1659 nr_started--;
1660 }
1661 } else {
1662 if (fork())
1663 sem_wait(&startup_sem);
1664 else {
1665 fork_main(shm_id, i);
1666 exit(0);
1667 }
fc24389f
JA
1668 }
1669 }
1670
3f39453a 1671 /*
e8457004 1672 * start created threads (TD_CREATED -> TD_RUNNING)
3f39453a 1673 */
fc24389f
JA
1674 for (i = 0; i < thread_number; i++) {
1675 struct thread_data *td = &threads[i];
1676
3f39453a
JA
1677 if (td->runstate != TD_CREATED)
1678 continue;
1679
40ef7f64 1680 td_set_runstate(td, TD_RUNNING);
3f39453a 1681 nr_running++;
2a81240d 1682 nr_started--;
3f39453a
JA
1683 m_rate += td->ratemin;
1684 t_rate += td->rate;
40ef7f64 1685 check_str_update(td, nr_running, t_rate, m_rate);
3f39453a 1686 sem_post(&td->mutex);
fc24389f
JA
1687 }
1688
e8457004
JA
1689 for (i = 0; i < thread_number; i++) {
1690 struct thread_data *td = &threads[i];
1691
b48889bb
JA
1692 if (td->runstate != TD_RUNNING &&
1693 td->runstate != TD_VERIFYING)
e8457004
JA
1694 continue;
1695
40ef7f64 1696 check_str_update(td, nr_running, t_rate, m_rate);
e8457004
JA
1697 }
1698
213b446c 1699 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1700
fc24389f
JA
1701 if (todo)
1702 usleep(100000);
1703 }
02bdd9ba
JA
1704
1705 while (nr_running) {
213b446c 1706 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1707 usleep(10000);
1708 }
fc24389f
JA
1709}
1710
0d80f40d 1711static void show_group_stats(struct group_run_stats *rs, int id)
8867c0a8 1712{
0d80f40d
JA
1713 printf("\nRun status group %d:\n", id);
1714
1715 if (rs->max_run[DDIR_READ])
1716 printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[0], rs->agg[0], rs->min_bw[0], rs->max_bw[0], rs->min_run[0], rs->max_run[0]);
1717 if (rs->max_run[DDIR_WRITE])
1718 printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", rs->io_mb[1], rs->agg[1], rs->min_bw[1], rs->max_bw[1], rs->min_run[1], rs->max_run[1]);
1719}
1720
1721static void show_run_stats(void)
1722{
1723 struct group_run_stats *runstats, *rs;
557e4102 1724 struct thread_data *td;
8867c0a8
JA
1725 int i;
1726
0d80f40d
JA
1727 runstats = malloc(sizeof(struct group_run_stats) * (groupid + 1));
1728
1729 for (i = 0; i < groupid + 1; i++) {
1730 rs = &runstats[i];
1731
f6dcd824 1732 memset(rs, 0, sizeof(*rs));
0d80f40d
JA
1733 rs->min_bw[0] = rs->min_run[0] = ~0UL;
1734 rs->min_bw[1] = rs->min_run[1] = ~0UL;
0d80f40d
JA
1735 }
1736
1737 for (i = 0; i < thread_number; i++) {
0d80f40d
JA
1738 unsigned long bw = 0;
1739
557e4102
JA
1740 td = &threads[i];
1741
1742 if (td->error)
1743 continue;
1744
0d80f40d
JA
1745 rs = &runstats[td->groupid];
1746
557e4102
JA
1747 if (td->runtime < rs->min_run[td->ddir])
1748 rs->min_run[td->ddir] = td->runtime;
1749 if (td->runtime > rs->max_run[td->ddir])
1750 rs->max_run[td->ddir] = td->runtime;
0d80f40d 1751
557e4102
JA
1752 if (td->runtime)
1753 bw = td->io_bytes / td->runtime;
1754 if (bw < rs->min_bw[td->ddir])
1755 rs->min_bw[td->ddir] = bw;
1756 if (bw > rs->max_bw[td->ddir])
1757 rs->max_bw[td->ddir] = bw;
0d80f40d 1758
557e4102 1759 rs->io_mb[td->ddir] += td->io_bytes >> 20;
0d80f40d 1760 }
9d489c62 1761
0d80f40d
JA
1762 for (i = 0; i < groupid + 1; i++) {
1763 rs = &runstats[i];
1764
1765 if (rs->max_run[0])
1766 rs->agg[0] = (rs->io_mb[0]*1024*1000) / rs->max_run[0];
1767 if (rs->max_run[1])
1768 rs->agg[1] = (rs->io_mb[1]*1024*1000) / rs->max_run[1];
0d80f40d 1769 }
557e4102
JA
1770
1771 for (i = 0; i < thread_number; i++) {
1772 td = &threads[i];
1773 rs = &runstats[td->groupid];
1774
1775 if (!td->error)
1776 show_thread_status(td, rs);
1777 }
9d489c62
JA
1778
1779 for (i = 0; i < groupid + 1; i++)
1780 show_group_stats(&runstats[i], i);
0d80f40d
JA
1781}
1782
1783int main(int argc, char *argv[])
1784{
27c32a38 1785 memset(run_str, 0, sizeof(run_str));
5961d92c 1786
27c32a38 1787 if (parse_options(argc, argv))
5961d92c 1788 return 1;
7dd1389e 1789
4240cfa1
JA
1790 if (!thread_number) {
1791 printf("Nothing to do\n");
1792 return 1;
1793 }
7dd1389e 1794
fc24389f 1795 run_threads(argv);
0d80f40d 1796 show_run_stats();
fc24389f 1797
892199bd
JA
1798 return 0;
1799}