[PATCH] fio: use one big allocation for io units
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
7dd1389e 29#include <ctype.h>
18e0b78c 30#include <sched.h>
43000118 31#include <libaio.h>
e128065d 32#include <math.h>
02983297 33#include <limits.h>
892199bd
JA
34#include <sys/time.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <sys/wait.h>
38#include <semaphore.h>
39#include <sys/ipc.h>
40#include <sys/shm.h>
41#include <asm/unistd.h>
42
2c83567e
JA
43#include "list.h"
44
4240cfa1
JA
45#define MAX_JOBS (1024)
46
892199bd
JA
47/*
48 * assume we don't have _get either, if _set isn't defined
49 */
50#ifndef __NR_ioprio_set
892199bd
JA
51#if defined(__i386__)
52#define __NR_ioprio_set 289
53#define __NR_ioprio_get 290
54#elif defined(__powerpc__) || defined(__powerpc64__)
55#define __NR_ioprio_set 273
56#define __NR_ioprio_get 274
57#elif defined(__x86_64__)
58#define __NR_ioprio_set 251
59#define __NR_ioprio_get 252
60#elif defined(__ia64__)
61#define __NR_ioprio_set 1274
62#define __NR_ioprio_get 1275
63#elif defined(__alpha__)
64#define __NR_ioprio_set 442
65#define __NR_ioprio_get 443
66#elif defined(__s390x__) || defined(__s390__)
67#define __NR_ioprio_set 282
68#define __NR_ioprio_get 283
69#else
70#error "Unsupported arch"
71#endif
b95799ca 72#endif
892199bd 73
b95799ca
JA
74#ifndef __NR_fadvise64
75#if defined(__i386__)
76#define __NR_fadvise64 250
77#elif defined(__powerpc__) || defined(__powerpc64__)
78#define __NR_fadvise64 233
79#elif defined(__x86_64__)
80#define __NR_fadvise64 221
81#elif defined(__ia64__)
82#define __NR_fadvise64 1234
83#elif defined(__alpha__)
84#define __NR_fadvise64 413
85#elif defined(__s390x__) || defined(__s390__)
86#define __NR_fadvise64 253
87#else
88#error "Unsupported arch"
89#endif
892199bd
JA
90#endif
91
92static int ioprio_set(int which, int who, int ioprio)
93{
94 return syscall(__NR_ioprio_set, which, who, ioprio);
95}
96
b95799ca
JA
97/*
98 * we want fadvise64 really, but it's so tangled... later
99 */
100static int fadvise(int fd, loff_t offset, size_t len, int advice)
101{
102#if 0
103 return syscall(__NR_fadvise64, fd, offset, offset >> 32, len, advice);
104#else
105 return posix_fadvise(fd, (off_t) offset, len, advice);
106#endif
107}
108
892199bd
JA
109enum {
110 IOPRIO_WHO_PROCESS = 1,
111 IOPRIO_WHO_PGRP,
112 IOPRIO_WHO_USER,
113};
114
115#define IOPRIO_CLASS_SHIFT 13
116
892199bd
JA
117#define MASK (4095)
118
4240cfa1
JA
119#define DEF_BS (4096)
120#define DEF_TIMEOUT (30)
121#define DEF_RATE_CYCLE (1000)
122#define DEF_ODIRECT (1)
123#define DEF_SEQUENTIAL (1)
4240cfa1 124#define DEF_RAND_REPEAT (1)
02983297
JA
125#define DEF_OVERWRITE (0)
126#define DEF_CREATE (1)
b95799ca 127#define DEF_INVALIDATE (1)
4240cfa1
JA
128
129#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 130
4240cfa1 131static int repeatable = DEF_RAND_REPEAT;
02bdd9ba 132static int rate_quit = 1;
a0a9b35b
JA
133static int write_lat_log;
134static int write_bw_log;
892199bd 135
892199bd 136static int thread_number;
7dd1389e 137static char *ini_file;
892199bd 138
8867c0a8
JA
139static int max_jobs = MAX_JOBS;
140
8dbff0b1
JA
141static char run_str[MAX_JOBS + 1];
142
892199bd
JA
143static int shm_id;
144
4240cfa1
JA
145enum {
146 DDIR_READ = 0,
147 DDIR_WRITE,
148};
892199bd 149
02bdd9ba
JA
150/*
151 * thread life cycle
152 */
153enum {
154 TD_NOT_CREATED = 0,
155 TD_CREATED,
156 TD_STARTED,
157 TD_EXITED,
158 TD_REAPED,
159};
160
2c83567e
JA
161/*
162 * The io unit
163 */
164struct io_u {
165 struct iocb iocb;
57d753e3 166 struct timeval start_time;
2c83567e
JA
167 struct timeval issue_time;
168
2c83567e
JA
169 char *buf;
170 unsigned int buflen;
4ac89145 171 unsigned long long offset;
2c83567e
JA
172
173 struct list_head list;
174};
175
57d753e3
JA
176struct io_stat {
177 unsigned long val;
178 unsigned long val_sq;
179 unsigned long max_val;
180 unsigned long min_val;
181 unsigned long samples;
182};
183
a0a9b35b
JA
184struct io_sample {
185 unsigned long time;
186 unsigned long val;
187};
188
189struct io_log {
190 unsigned long nr_samples;
191 unsigned long max_samples;
192 struct io_sample *log;
193};
194
02983297
JA
195#define td_read(td) ((td)->ddir == DDIR_READ)
196#define should_fsync(td) (!td_read(td) && !(td)->odirect)
197
892199bd
JA
198struct thread_data {
199 char file_name[256];
200 int thread_number;
201 int error;
202 int fd;
892199bd 203 pid_t pid;
6b71c826 204 char *orig_buffer;
4240cfa1 205 volatile int terminate;
02bdd9ba 206 volatile int runstate;
f737299d
JA
207 unsigned int ddir;
208 unsigned int ioprio;
209 unsigned int sequential;
210 unsigned int bs;
211 unsigned int odirect;
212 unsigned int delay_sleep;
4240cfa1 213 unsigned int fsync_blocks;
fc24389f 214 unsigned int start_delay;
47d45203 215 unsigned int timeout;
43000118 216 unsigned int use_aio;
02983297
JA
217 unsigned int create_file;
218 unsigned int overwrite;
b95799ca 219 unsigned int invalidate_cache;
02983297
JA
220 unsigned long long file_size;
221 unsigned long long file_offset;
74b4b5fb 222 unsigned int sync_io;
18e0b78c 223 cpu_set_t cpumask;
86184d14 224
63a09e51
JA
225 off_t cur_off;
226
254605cd 227 io_context_t aio_ctx;
43000118 228 unsigned int aio_depth;
43000118 229 struct io_event *aio_events;
2c83567e
JA
230
231 unsigned int cur_depth;
232 struct list_head io_u_freelist;
233 struct list_head io_u_busylist;
43000118 234
7dd1389e 235 unsigned int rate;
4240cfa1
JA
236 unsigned int ratemin;
237 unsigned int ratecycle;
238 unsigned long rate_usec_cycle;
239 long rate_pending_usleep;
240 unsigned long rate_blocks;
241 struct timeval lastrate;
86184d14 242
892199bd
JA
243 unsigned long runtime; /* sec */
244 unsigned long blocks;
4240cfa1 245 unsigned long io_blocks;
892199bd
JA
246 unsigned long last_block;
247 sem_t mutex;
892199bd
JA
248 struct drand48_data random_state;
249
250 /*
e128065d 251 * bandwidth and latency stats
892199bd 252 */
57d753e3
JA
253 struct io_stat clat_stat; /* completion latency */
254 struct io_stat slat_stat; /* submission latency */
255
256 struct io_stat bw_stat; /* bandwidth stats */
fd1ae4c9 257 unsigned long stat_io_blocks;
fd1ae4c9 258 struct timeval stat_sample_time;
4240cfa1 259
a0a9b35b
JA
260 struct io_log *lat_log;
261 struct io_log *bw_log;
262
4240cfa1 263 struct timeval start;
892199bd
JA
264};
265
266static struct thread_data *threads;
47d45203 267static struct thread_data def_thread;
892199bd
JA
268
269static sem_t startup_sem;
270
5c24b2c4 271static void sig_handler(int sig)
892199bd
JA
272{
273 int i;
274
213b446c
JA
275 for (i = 0; i < thread_number; i++) {
276 struct thread_data *td = &threads[i];
277
278 td->terminate = 1;
279 td->start_delay = 0;
280 }
02bdd9ba
JA
281}
282
5c24b2c4 283static int init_random_state(struct thread_data *td)
892199bd
JA
284{
285 unsigned long seed = 123;
286
287 if (td->sequential)
288 return 0;
289
290 if (!repeatable) {
291 int fd = open("/dev/random", O_RDONLY);
292
293 if (fd == -1) {
294 td->error = errno;
295 return 1;
296 }
297
7dd1389e 298 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
892199bd
JA
299 td->error = EIO;
300 close(fd);
301 return 1;
302 }
303
304 close(fd);
305 }
306
307 srand48_r(seed, &td->random_state);
308 return 0;
309}
310
5c24b2c4 311static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
312{
313 double sec, usec;
314
315 sec = e->tv_sec - s->tv_sec;
316 usec = e->tv_usec - s->tv_usec;
317 if (sec > 0 && usec < 0) {
318 sec--;
319 usec += 1000000;
320 }
321
322 sec *= (double) 1000000;
323
324 return sec + usec;
325}
326
5c24b2c4 327static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
328{
329 double sec, usec;
330
331 sec = e->tv_sec - s->tv_sec;
332 usec = e->tv_usec - s->tv_usec;
333 if (sec > 0 && usec < 0) {
334 sec--;
335 usec += 1000000;
336 }
337
338 sec *= (double) 1000;
339 usec /= (double) 1000;
340
341 return sec + usec;
342}
343
be33abe4
JA
344static unsigned long mtime_since_now(struct timeval *s)
345{
346 struct timeval t;
347
348 gettimeofday(&t, NULL);
349 return mtime_since(s, &t);
350}
351
98168d55
JA
352static inline unsigned long msec_now(struct timeval *s)
353{
354 return s->tv_sec * 1000 + s->tv_usec / 1000;
355}
356
4ac89145 357static unsigned long long get_next_offset(struct thread_data *td)
892199bd 358{
4ac89145 359 unsigned long long b;
892199bd
JA
360 long r;
361
362 if (!td->sequential) {
363 lrand48_r(&td->random_state, &r);
364 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
365 } else {
366 b = td->last_block;
367 td->last_block++;
368 }
369
02983297 370 return b * td->bs + td->file_offset;
892199bd
JA
371}
372
57d753e3
JA
373static inline void add_stat_sample(struct thread_data *td, struct io_stat *is,
374 unsigned long val)
892199bd 375{
57d753e3
JA
376 if (val > is->max_val)
377 is->max_val = val;
378 if (val < is->min_val)
379 is->min_val = val;
380
381 is->val += val;
382 is->val_sq += val * val;
383 is->samples++;
384}
fd1ae4c9 385
a0a9b35b
JA
386static void add_log_sample(struct thread_data *td, struct io_log *log,
387 unsigned long val)
388{
389 if (log->nr_samples == log->max_samples) {
390 int new_size = sizeof(struct io_sample) * log->max_samples * 2;
391
392 log->log = realloc(log->log, new_size);
393 log->max_samples <<= 1;
394 }
395
396 log->log[log->nr_samples].val = val;
397 log->log[log->nr_samples].time = mtime_since_now(&td->start);
398 log->nr_samples++;
399}
400
57d753e3
JA
401static void add_clat_sample(struct thread_data *td, unsigned long msec)
402{
403 add_stat_sample(td, &td->clat_stat, msec);
a0a9b35b
JA
404
405 if (td->lat_log)
406 add_log_sample(td, td->lat_log, msec);
57d753e3 407}
fd1ae4c9 408
57d753e3
JA
409static void add_slat_sample(struct thread_data *td, unsigned long msec)
410{
411 add_stat_sample(td, &td->slat_stat, msec);
412}
fd1ae4c9 413
57d753e3
JA
414static void add_bw_sample(struct thread_data *td, unsigned long msec)
415{
416 unsigned long spent = mtime_since_now(&td->stat_sample_time);
417 unsigned long rate;
418
419 if (spent < 500)
420 return;
421
422 rate = ((td->io_blocks - td->stat_io_blocks) * td->bs) / spent;
423 add_stat_sample(td, &td->bw_stat, rate);
424
a0a9b35b
JA
425 if (td->bw_log)
426 add_log_sample(td, td->bw_log, rate);
427
57d753e3
JA
428 gettimeofday(&td->stat_sample_time, NULL);
429 td->stat_io_blocks = td->io_blocks;
892199bd
JA
430}
431
5c24b2c4 432static void usec_sleep(int usec)
892199bd 433{
86184d14
JA
434 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
435 struct timespec rem;
892199bd
JA
436
437 do {
86184d14
JA
438 rem.tv_sec = rem.tv_nsec = 0;
439 nanosleep(&req, &rem);
440 if (!rem.tv_nsec)
892199bd 441 break;
86184d14
JA
442
443 req.tv_nsec = rem.tv_nsec;
892199bd
JA
444 } while (1);
445}
446
5c24b2c4 447static void rate_throttle(struct thread_data *td, unsigned long time_spent)
86184d14 448{
4240cfa1
JA
449 if (!td->rate)
450 return;
451
86184d14
JA
452 if (time_spent < td->rate_usec_cycle) {
453 unsigned long s = td->rate_usec_cycle - time_spent;
454
455 td->rate_pending_usleep += s;
fad86e6a 456 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
457 usec_sleep(td->rate_pending_usleep);
458 td->rate_pending_usleep = 0;
459 }
4240cfa1 460 } else {
42b2b9fe
JA
461 long overtime = time_spent - td->rate_usec_cycle;
462
4240cfa1
JA
463 td->rate_pending_usleep -= overtime;
464 }
465}
466
5c24b2c4 467static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 468{
7607bc6b 469 unsigned long spent;
4240cfa1
JA
470 unsigned long rate;
471
472 /*
473 * allow a 2 second settle period in the beginning
474 */
7607bc6b 475 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
476 return 0;
477
478 /*
479 * if rate blocks is set, sample is running
480 */
481 if (td->rate_blocks) {
482 spent = mtime_since(&td->lastrate, now);
483 if (spent < td->ratecycle)
484 return 0;
485
486 rate = ((td->io_blocks - td->rate_blocks) * td->bs) / spent;
487 if (rate < td->ratemin) {
488 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 489 if (rate_quit)
e6402082 490 sig_handler(0);
4240cfa1
JA
491 return 1;
492 }
86184d14 493 }
4240cfa1
JA
494
495 td->rate_blocks = td->io_blocks;
496 memcpy(&td->lastrate, now, sizeof(*now));
497 return 0;
86184d14
JA
498}
499
67903a2e
JA
500static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
501{
502 if (mtime_since(&td->start, t) >= td->timeout * 1000)
503 return 1;
504
505 return 0;
506}
507
2c83567e
JA
508static void put_io_u(struct thread_data *td, struct io_u *io_u)
509{
510 list_del(&io_u->list);
511 list_add(&io_u->list, &td->io_u_freelist);
512 td->cur_depth--;
513}
514
515static struct io_u *get_io_u(struct thread_data *td)
516{
517 struct io_u *io_u;
518
519 if (list_empty(&td->io_u_freelist))
520 return NULL;
521
522 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
523 list_del(&io_u->list);
524 list_add(&io_u->list, &td->io_u_busylist);
525
526 io_u->offset = get_next_offset(td);
527
528 if (td->use_aio) {
529 if (td_read(td))
530 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
531 else
532 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
533 }
534
57d753e3 535 gettimeofday(&io_u->start_time, NULL);
2c83567e
JA
536 td->cur_depth++;
537 return io_u;
538}
539
43000118 540static void do_sync_io(struct thread_data *td)
892199bd 541{
86184d14 542 unsigned long blocks, msec, usec;
2c83567e 543 struct timeval e;
892199bd 544
63a09e51
JA
545 td->cur_off = 0;
546
892199bd 547 for (blocks = 0; blocks < td->blocks; blocks++) {
2c83567e 548 struct io_u *io_u;
892199bd
JA
549 int ret;
550
551 if (td->terminate)
552 break;
553
2c83567e
JA
554 io_u = get_io_u(td);
555
63a09e51
JA
556 if (td->cur_off != io_u->offset) {
557 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
558 td->error = errno;
559 break;
560 }
892199bd
JA
561 }
562
563 if (td->delay_sleep)
86184d14 564 usec_sleep(td->delay_sleep);
892199bd 565
02983297 566 if (td_read(td))
2c83567e 567 ret = read(td->fd, io_u->buf, io_u->buflen);
892199bd 568 else
2c83567e 569 ret = write(td->fd, io_u->buf, io_u->buflen);
892199bd 570
2c83567e 571 if (ret < (int) io_u->buflen) {
892199bd
JA
572 if (ret == -1)
573 td->error = errno;
574 break;
575 }
576
4240cfa1 577 td->io_blocks++;
63a09e51 578 td->cur_off = io_u->offset + io_u->buflen;
4240cfa1
JA
579
580 if (should_fsync(td) && td->fsync_blocks &&
581 (td->io_blocks % td->fsync_blocks) == 0)
582 fsync(td->fd);
583
86184d14
JA
584 gettimeofday(&e, NULL);
585
57d753e3 586 usec = utime_since(&io_u->start_time, &e);
86184d14 587
4240cfa1 588 rate_throttle(td, usec);
892199bd 589
4240cfa1
JA
590 if (check_min_rate(td, &e)) {
591 td->error = ENODATA;
592 break;
593 }
892199bd 594
4240cfa1 595 msec = usec / 1000;
57d753e3
JA
596 add_clat_sample(td, msec);
597 add_bw_sample(td, msec);
67903a2e
JA
598
599 if (runtime_exceeded(td, &e))
600 break;
2c83567e
JA
601
602 put_io_u(td, io_u);
892199bd
JA
603 }
604
4240cfa1 605 if (should_fsync(td))
892199bd 606 fsync(td->fd);
892199bd 607}
43000118 608
2c83567e 609static int io_u_queue(struct thread_data *td, struct io_u *io_u)
56b0eff0 610{
2c83567e 611 struct iocb *iocb = &io_u->iocb;
56b0eff0
JA
612 int ret;
613
614 do {
254605cd 615 ret = io_submit(td->aio_ctx, 1, &iocb);
56b0eff0
JA
616 if (ret == 1)
617 return 0;
a592bd33 618 else if (ret == EAGAIN)
56b0eff0 619 usleep(100);
a592bd33
JA
620 else if (ret == EINTR)
621 continue;
56b0eff0
JA
622 else
623 break;
624 } while (1);
625
a592bd33 626 return ret;
56b0eff0
JA
627}
628
98168d55 629#define iocb_time(iocb) ((unsigned long) (iocb)->data)
2c83567e
JA
630#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
631
632static void ios_completed(struct thread_data *td, int nr)
633{
634 unsigned long msec;
635 struct io_u *io_u;
636 struct timeval e;
637 int i;
638
639 gettimeofday(&e, NULL);
640
641 for (i = 0; i < nr; i++) {
642 td->io_blocks++;
643
644 io_u = ev_to_iou(td->aio_events + i);
645
646 msec = mtime_since(&io_u->issue_time, &e);
647
57d753e3
JA
648 add_clat_sample(td, msec);
649 add_bw_sample(td, msec);
2c83567e
JA
650
651 put_io_u(td, io_u);
652 }
653}
654
655static void cleanup_pending_aio(struct thread_data *td)
656{
657 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
658 struct list_head *entry, *n;
659 struct io_u *io_u;
660 int r;
661
662 /*
663 * get immediately available events, if any
664 */
665 r = io_getevents(td->aio_ctx, 0, td->cur_depth, td->aio_events, &ts);
666 if (r > 0)
667 ios_completed(td, r);
668
669 /*
670 * now cancel remaining active events
671 */
672 list_for_each_safe(entry, n, &td->io_u_busylist) {
673 io_u = list_entry(entry, struct io_u, list);
674
675 r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events);
676 if (!r)
677 put_io_u(td, io_u);
678 }
679
680 if (td->cur_depth) {
681 r = io_getevents(td->aio_ctx, td->cur_depth, td->cur_depth, td->aio_events, NULL);
682 if (r > 0)
683 ios_completed(td, r);
684 }
685}
98168d55 686
43000118
JA
687static void do_async_io(struct thread_data *td)
688{
689 struct timeval s, e;
2c83567e 690 unsigned long blocks, usec;
43000118 691
43000118
JA
692 for (blocks = 0; blocks < td->blocks; blocks++) {
693 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
694 struct timespec *timeout;
2c83567e
JA
695 int ret, min_evts = 0;
696 struct io_u *io_u;
43000118
JA
697
698 if (td->terminate)
699 break;
700
701 if (td->delay_sleep)
702 usec_sleep(td->delay_sleep);
703
2c83567e 704 io_u = get_io_u(td);
43000118 705
57d753e3 706 memcpy(&s, &io_u->start_time, sizeof(s));
8baf1bcc 707
2c83567e 708 ret = io_u_queue(td, io_u);
56b0eff0 709 if (ret) {
a3fdb993 710 put_io_u(td, io_u);
a592bd33 711 td->error = ret;
43000118
JA
712 break;
713 }
714
57d753e3
JA
715 gettimeofday(&io_u->issue_time, NULL);
716 add_slat_sample(td, mtime_since(&io_u->start_time, &io_u->issue_time));
717
2c83567e 718 if (td->cur_depth < td->aio_depth) {
43000118
JA
719 timeout = &ts;
720 min_evts = 0;
721 } else {
722 timeout = NULL;
723 min_evts = 1;
724 }
725
2c83567e 726 ret = io_getevents(td->aio_ctx, min_evts, td->cur_depth, td->aio_events, timeout);
43000118
JA
727 if (ret < 0) {
728 td->error = errno;
729 break;
730 } else if (!ret)
731 continue;
732
2c83567e 733 ios_completed(td, ret);
43000118 734
4ac89145
JA
735 if (should_fsync(td) && td->fsync_blocks &&
736 (td->io_blocks % td->fsync_blocks) == 0)
737 fsync(td->fd);
738
98168d55
JA
739 /*
740 * the rate is batched for now, it should work for batches
741 * of completions except the very first one which may look
742 * a little bursty
743 */
2c83567e 744 gettimeofday(&e, NULL);
43000118
JA
745 usec = utime_since(&s, &e);
746
747 rate_throttle(td, usec);
748
749 if (check_min_rate(td, &e)) {
750 td->error = ENODATA;
751 break;
752 }
67903a2e
JA
753
754 if (runtime_exceeded(td, &e))
755 break;
43000118 756 }
56b0eff0 757
2c83567e
JA
758 if (td->cur_depth)
759 cleanup_pending_aio(td);
4ac89145
JA
760
761 if (should_fsync(td))
762 fsync(td->fd);
56b0eff0
JA
763}
764
765static void cleanup_aio(struct thread_data *td)
766{
254605cd
JA
767 io_destroy(td->aio_ctx);
768
43000118
JA
769 if (td->aio_events)
770 free(td->aio_events);
43000118
JA
771}
772
773static int init_aio(struct thread_data *td)
774{
254605cd 775 if (io_queue_init(td->aio_depth, &td->aio_ctx)) {
43000118
JA
776 td->error = errno;
777 return 1;
778 }
779
43000118 780 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
43000118
JA
781 return 0;
782}
783
2c83567e
JA
784static void cleanup_io_u(struct thread_data *td)
785{
786 struct list_head *entry, *n;
787 struct io_u *io_u;
788
789 list_for_each_safe(entry, n, &td->io_u_freelist) {
790 io_u = list_entry(entry, struct io_u, list);
791
792 list_del(&io_u->list);
2c83567e
JA
793 free(io_u);
794 }
6b71c826
JA
795
796 free(td->orig_buffer);
2c83567e
JA
797}
798
799static void init_io_u(struct thread_data *td)
800{
801 struct io_u *io_u;
802 int i, max_units;
6b71c826 803 char *p;
2c83567e
JA
804
805 if (!td->use_aio)
806 max_units = 1;
807 else
808 max_units = td->aio_depth;
809
6b71c826
JA
810 p = malloc(td->bs * max_units + MASK);
811 td->orig_buffer = ALIGN(p);
812
2c83567e
JA
813 INIT_LIST_HEAD(&td->io_u_freelist);
814 INIT_LIST_HEAD(&td->io_u_busylist);
815
6b71c826 816 p = td->orig_buffer;
2c83567e
JA
817 for (i = 0; i < max_units; i++) {
818 io_u = malloc(sizeof(*io_u));
819 memset(io_u, 0, sizeof(*io_u));
820 INIT_LIST_HEAD(&io_u->list);
821
6b71c826 822 io_u->buf = p + td->bs * i;
2c83567e
JA
823 io_u->buflen = td->bs;
824
825 list_add(&io_u->list, &td->io_u_freelist);
826 }
827}
828
a0a9b35b
JA
829static void setup_log(struct io_log **log)
830{
831 struct io_log *l = malloc(sizeof(*l));
832
833 l->nr_samples = 0;
834 l->max_samples = 1024;
835 l->log = malloc(l->max_samples * sizeof(struct io_sample));
836 *log = l;
837}
838
839static void finish_log(struct thread_data *td, struct io_log *log, char *name)
840{
841 char file_name[128];
842 FILE *f;
843 int i;
844
845 sprintf(file_name, "client%d_%s.log", td->thread_number, name);
846 f = fopen(file_name, "w");
847 if (!f) {
848 perror("fopen log");
849 return;
850 }
851
852 for (i = 0; i < log->nr_samples; i++)
853 fprintf(f, "%lu, %lu\n", log->log[i].time, log->log[i].val);
854
855 fclose(f);
856 free(log->log);
857 free(log);
858}
859
02983297
JA
860static int create_file(struct thread_data *td)
861{
862 unsigned int i;
863 char *b;
864
02983297
JA
865 /*
866 * unless specifically asked for overwrite, let normal io extend it
867 */
868 if (!td_read(td) && !td->overwrite)
869 return 0;
870
57d753e3
JA
871 if (!td->file_size) {
872 fprintf(stderr, "Need size for create\n");
873 td->error = EINVAL;
874 return 1;
875 }
876
02983297
JA
877 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
878 if (td->fd < 0) {
879 td->error = errno;
880 return 1;
881 }
882
883 td->blocks = td->file_size / td->bs;
884 b = malloc(td->bs);
885 memset(b, 0, td->bs);
886
887 for (i = 0; i < td->blocks; i++) {
888 int r = write(td->fd, b, td->bs);
889
890 if (r == td->bs)
891 continue;
892 else {
893 if (r < 0)
894 td->error = errno;
895 else
896 td->error = EIO;
897
898 break;
899 }
900 }
901
902 fsync(td->fd);
903 close(td->fd);
904 td->fd = -1;
905 free(b);
906 return 0;
907}
908
909static int file_exists(struct thread_data *td)
910{
911 struct stat st;
912
913 if (stat(td->file_name, &st) != -1)
914 return 1;
915
916 return errno != ENOENT;
917}
918
919static int setup_file(struct thread_data *td)
920{
921 struct stat st;
922 int flags = 0;
923
924 if (!file_exists(td)) {
925 if (!td->create_file) {
926 td->error = ENOENT;
927 return 1;
928 }
929 if (create_file(td))
930 return 1;
931 }
932
933 if (td->odirect)
934 flags |= O_DIRECT;
935
936 if (td_read(td))
937 td->fd = open(td->file_name, flags | O_RDONLY);
938 else {
939 if (!td->overwrite)
940 flags |= O_TRUNC;
74b4b5fb
JA
941 if (td->sync_io)
942 flags |= O_SYNC;
02983297
JA
943
944 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT, 0600);
945 }
946
947 if (td->fd == -1) {
948 td->error = errno;
949 return 1;
950 }
951
952 if (td_read(td)) {
953 if (fstat(td->fd, &st) == -1) {
954 td->error = errno;
955 return 1;
956 }
957
958 if (td->file_size > st.st_size)
959 st.st_size = td->file_size;
960 } else {
961 if (!td->file_size)
962 td->file_size = 1024 * 1024 * 1024;
963
964 st.st_size = td->file_size;
965 }
966
967 td->blocks = (st.st_size - td->file_offset) / td->bs;
968 if (!td->blocks) {
969 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
970 td->error = EINVAL;
971 return 1;
972 }
973
b95799ca
JA
974 if (td->invalidate_cache) {
975 if (fadvise(td->fd, 0, st.st_size, POSIX_FADV_DONTNEED) < 0) {
976 td->error = errno;
977 return 1;
978 }
979 }
980
02983297
JA
981 return 0;
982}
983
5c24b2c4 984static void *thread_main(int shm_id, int offset, char *argv[])
892199bd
JA
985{
986 struct thread_data *td;
02983297 987 int ret = 1;
2c83567e 988 void *data;
892199bd 989
7292613b
JA
990 setsid();
991
892199bd 992 data = shmat(shm_id, NULL, 0);
4ac89145
JA
993 if (data == (void *) -1) {
994 perror("shmat");
995 return NULL;
996 }
997
892199bd
JA
998 td = data + offset * sizeof(struct thread_data);
999 td->pid = getpid();
1000
2c83567e
JA
1001 init_io_u(td);
1002
18e0b78c
JA
1003 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
1004 td->error = errno;
1005 goto err;
1006 }
1007
4240cfa1 1008 sprintf(argv[0], "fio%d", offset);
892199bd 1009
43000118
JA
1010 if (td->use_aio && init_aio(td))
1011 goto err;
1012
892199bd 1013 if (init_random_state(td))
599002b3 1014 goto err;
892199bd 1015
f737299d 1016 if (td->ioprio) {
892199bd
JA
1017 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
1018 td->error = errno;
599002b3 1019 goto err;
892199bd
JA
1020 }
1021 }
1022
02983297
JA
1023 if (setup_file(td))
1024 goto err;
1025
892199bd
JA
1026 sem_post(&startup_sem);
1027 sem_wait(&td->mutex);
43000118 1028
7292613b
JA
1029 gettimeofday(&td->start, NULL);
1030
1031 if (td->ratemin)
1032 memcpy(&td->lastrate, &td->start, sizeof(td->start));
1033
fd1ae4c9
JA
1034 memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
1035
e4ed35c3 1036 if (!td->use_aio)
43000118 1037 do_sync_io(td);
e4ed35c3 1038 else
43000118 1039 do_async_io(td);
7292613b 1040
be33abe4 1041 td->runtime = mtime_since_now(&td->start);
892199bd 1042 ret = 0;
a0a9b35b
JA
1043
1044 if (td->bw_log)
1045 finish_log(td, td->bw_log, "bw");
1046 if (td->lat_log)
1047 finish_log(td, td->lat_log, "lat");
4ac89145 1048
892199bd 1049err:
7292613b
JA
1050 if (td->fd != -1) {
1051 close(td->fd);
1052 td->fd = -1;
1053 }
4ac89145
JA
1054 if (td->use_aio)
1055 cleanup_aio(td);
2c83567e 1056 cleanup_io_u(td);
599002b3 1057 if (ret) {
892199bd 1058 sem_post(&startup_sem);
599002b3
JA
1059 sem_wait(&td->mutex);
1060 }
02bdd9ba 1061 td->runstate = TD_EXITED;
4240cfa1 1062 shmdt(data);
892199bd
JA
1063 return NULL;
1064}
1065
5c24b2c4 1066static void free_shm(void)
892199bd 1067{
c269123b
JA
1068 struct shmid_ds sbuf;
1069
1070 if (threads) {
1071 shmdt(threads);
1072 threads = NULL;
1073 shmctl(shm_id, IPC_RMID, &sbuf);
1074 }
892199bd
JA
1075}
1076
57d753e3
JA
1077static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
1078 double *mean, double *dev)
1079{
1080 double n;
1081
1082 if (is->samples == 0)
1083 return 0;
1084
1085 *min = is->min_val;
1086 *max = is->max_val;
1087
1088 n = (double) is->samples;
1089 *mean = (double) is->val / n;
1090 *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
1091 return 1;
1092}
1093
5c24b2c4 1094static void show_thread_status(struct thread_data *td)
892199bd
JA
1095{
1096 int prio, prio_class;
57d753e3
JA
1097 unsigned long min, max, bw = 0;
1098 double mean, dev;
892199bd 1099
213b446c
JA
1100 if (!td->io_blocks && !td->error)
1101 return;
1102
892199bd 1103 if (td->runtime)
4240cfa1 1104 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1105
1106 prio = td->ioprio & 0xff;
1107 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
1108
57d753e3 1109 printf("Client%d: err=%2d, io=%6luMiB, bw=%6luKiB/s\n", td->thread_number, td->error, td->io_blocks * td->bs >> 20, bw);
fd1ae4c9 1110
57d753e3
JA
1111 if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev))
1112 printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1113 if (calc_lat(&td->clat_stat, &min, &max, &mean, &dev))
1114 printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1115 if (calc_lat(&td->bw_stat, &min, &max, &mean, &dev))
1116 printf(" bw (KiB/s) : min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
892199bd
JA
1117}
1118
5c24b2c4 1119static int setup_rate(struct thread_data *td)
86184d14 1120{
4240cfa1
JA
1121 int nr_reads_per_sec;
1122
1123 if (!td->rate)
1124 return 0;
1125
1126 if (td->rate < td->ratemin) {
1127 fprintf(stderr, "min rate larger than nominal rate\n");
1128 return -1;
1129 }
86184d14 1130
4240cfa1 1131 nr_reads_per_sec = td->rate * 1024 / td->bs;
86184d14
JA
1132 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
1133 td->rate_pending_usleep = 0;
4240cfa1 1134 return 0;
86184d14
JA
1135}
1136
47d45203 1137static struct thread_data *get_new_job(int global)
892199bd 1138{
4240cfa1
JA
1139 struct thread_data *td;
1140
47d45203
JA
1141 if (global)
1142 return &def_thread;
8867c0a8 1143 if (thread_number >= max_jobs)
4240cfa1
JA
1144 return NULL;
1145
1146 td = &threads[thread_number++];
fc24389f 1147 memset(td, 0, sizeof(*td));
892199bd 1148
e4ed35c3 1149 td->fd = -1;
86184d14 1150 td->thread_number = thread_number;
76cb7b42 1151
47d45203 1152 td->ddir = def_thread.ddir;
76cb7b42
JA
1153 td->ioprio = def_thread.ioprio;
1154 td->sequential = def_thread.sequential;
47d45203
JA
1155 td->bs = def_thread.bs;
1156 td->odirect = def_thread.odirect;
76cb7b42
JA
1157 td->delay_sleep = def_thread.delay_sleep;
1158 td->fsync_blocks = def_thread.fsync_blocks;
1159 td->start_delay = def_thread.start_delay;
67903a2e 1160 td->timeout = def_thread.timeout;
76cb7b42 1161 td->use_aio = def_thread.use_aio;
02983297
JA
1162 td->create_file = def_thread.create_file;
1163 td->overwrite = def_thread.overwrite;
b95799ca 1164 td->invalidate_cache = def_thread.invalidate_cache;
76cb7b42 1165 td->file_size = def_thread.file_size;
9b5cf6c0 1166 td->file_offset = def_thread.file_offset;
76cb7b42
JA
1167 td->rate = def_thread.rate;
1168 td->ratemin = def_thread.ratemin;
1169 td->ratecycle = def_thread.ratecycle;
1170 td->aio_depth = def_thread.aio_depth;
47d45203 1171 memcpy(&td->cpumask, &def_thread.cpumask, sizeof(td->cpumask));
f737299d
JA
1172
1173 return td;
1174}
1175
4240cfa1
JA
1176static void put_job(struct thread_data *td)
1177{
1178 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
1179 thread_number--;
1180}
1181
5c24b2c4
JA
1182static int add_job(struct thread_data *td, const char *filename, int prioclass,
1183 int prio)
f737299d 1184{
47d45203
JA
1185 if (td == &def_thread)
1186 return 0;
1187
f737299d 1188 strcpy(td->file_name, filename);
4240cfa1 1189 sem_init(&td->mutex, 1, 0);
f737299d
JA
1190 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
1191
57d753e3
JA
1192 td->clat_stat.min_val = ULONG_MAX;
1193 td->slat_stat.min_val = ULONG_MAX;
1194 td->bw_stat.min_val = ULONG_MAX;
1195
8dbff0b1
JA
1196 run_str[td->thread_number - 1] = 'P';
1197
4ac89145
JA
1198 if (td->use_aio && !td->aio_depth)
1199 td->aio_depth = 1;
43000118 1200
4240cfa1
JA
1201 if (setup_rate(td))
1202 return -1;
f737299d 1203
a0a9b35b
JA
1204 if (write_lat_log)
1205 setup_log(&td->lat_log);
1206 if (write_bw_log)
1207 setup_log(&td->bw_log);
1208
e128065d 1209 printf("Client%d: file=%s, rw=%d, prio=%d/%d, seq=%d, odir=%d, bs=%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, filename, td->ddir, prioclass, prio, td->sequential, td->odirect, td->bs, td->rate, td->use_aio, td->aio_depth);
4240cfa1 1210 return 0;
892199bd
JA
1211}
1212
18e0b78c
JA
1213static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
1214{
f737299d 1215 unsigned int i;
18e0b78c
JA
1216
1217 CPU_ZERO(&cpumask);
1218
1219 for (i = 0; i < sizeof(int) * 8; i++) {
1220 if ((1 << i) & cpu)
1221 CPU_SET(i, &cpumask);
1222 }
1223}
1224
5c24b2c4 1225static void fill_option(const char *input, char *output)
892199bd
JA
1226{
1227 int i;
1228
1229 i = 0;
1230 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
1231 output[i] = input[i];
1232 i++;
1233 }
1234
1235 output[i] = '\0';
1236}
1237
02983297
JA
1238/*
1239 * convert string after '=' into decimal value, noting any size suffix
1240 */
1241static int str_cnv(char *p, unsigned long long *val)
1242{
1243 unsigned long mult;
1244 char *str;
1245 int len;
1246
1247 str = strstr(p, "=");
1248 if (!str)
1249 return 1;
1250
1251 str++;
1252 len = strlen(str);
1253 mult = 1;
1254
1255 switch (str[len - 2]) {
1256 case 'k':
1257 case 'K':
1258 mult = 1024;
1259 break;
1260 case 'm':
1261 case 'M':
1262 mult = 1024 * 1024;
1263 break;
1264 case 'g':
1265 case 'G':
1266 mult = 1024 * 1024 * 1024;
1267 break;
1268 }
1269
1270 *val = strtoul(str, NULL, 10);
1271 if (*val == ULONG_MAX && errno == ERANGE)
1272 return 1;
1273
1274 *val *= mult;
1275 return 0;
1276
1277}
1278
892199bd
JA
1279/*
1280 * job key words:
1281 *
1282 * file=
1283 * bs=
1284 * rw=
1285 * direct=
1286 */
5c24b2c4 1287static void parse_jobs_cmd(int argc, char *argv[], int index)
892199bd 1288{
f737299d
JA
1289 struct thread_data *td;
1290 unsigned int prio, prioclass, cpu;
892199bd
JA
1291 char *string, *filename, *p, *c;
1292 int i;
1293
1294 string = malloc(256);
1295 filename = malloc(256);
1296
1297 for (i = index; i < argc; i++) {
1298 p = argv[i];
1299
1300 c = strpbrk(p, "{");
1301 if (!c)
1302 break;
1303
1304 filename[0] = 0;
4240cfa1 1305
47d45203 1306 td = get_new_job(0);
4240cfa1
JA
1307 if (!td)
1308 break;
f737299d 1309
892199bd 1310 prioclass = 2;
f737299d 1311 prio = 4;
892199bd
JA
1312
1313 c = strstr(p, "rw=");
1314 if (c) {
1315 c += 3;
1316 if (*c == '0')
f737299d 1317 td->ddir = DDIR_READ;
892199bd 1318 else
f737299d 1319 td->ddir = DDIR_WRITE;
892199bd
JA
1320 }
1321
1322 c = strstr(p, "prio=");
1323 if (c) {
1324 c += 5;
1325 prio = *c - '0';
1326 }
1327
1328 c = strstr(p, "prioclass=");
1329 if (c) {
1330 c += 10;
1331 prioclass = *c - '0';
1332 }
1333
1334 c = strstr(p, "file=");
1335 if (c) {
1336 c += 5;
1337 fill_option(c, filename);
1338 }
1339
1340 c = strstr(p, "bs=");
1341 if (c) {
1342 c += 3;
1343 fill_option(c, string);
f737299d
JA
1344 td->bs = strtoul(string, NULL, 10);
1345 td->bs <<= 10;
892199bd
JA
1346 }
1347
1348 c = strstr(p, "direct=");
1349 if (c) {
1350 c += 7;
1351 if (*c != '0')
f737299d 1352 td->odirect = 1;
892199bd 1353 else
f737299d 1354 td->odirect = 0;
892199bd
JA
1355 }
1356
74b4b5fb
JA
1357 c = strstr(p, "sync=");
1358 if (c) {
1359 c += 5;
1360 if (*c != '0')
1361 td->sync_io = 1;
1362 else
1363 td->sync_io = 0;
1364 }
1365
892199bd
JA
1366 c = strstr(p, "delay=");
1367 if (c) {
1368 c += 6;
1369 fill_option(c, string);
f737299d 1370 td->delay_sleep = strtoul(string, NULL, 10);
892199bd
JA
1371 }
1372
86184d14
JA
1373 c = strstr(p, "rate=");
1374 if (c) {
1375 c += 5;
1376 fill_option(c, string);
f737299d 1377 td->rate = strtoul(string, NULL, 10);
86184d14
JA
1378 }
1379
4240cfa1
JA
1380 c = strstr(p, "ratemin=");
1381 if (c) {
1382 c += 8;
1383 fill_option(c, string);
1384 td->ratemin = strtoul(string, NULL, 10);
1385 }
1386
1387 c = strstr(p, "ratecycle=");
1388 if (c) {
1389 c += 10;
1390 fill_option(c, string);
1391 td->ratecycle = strtoul(string, NULL, 10);
1392 }
1393
18e0b78c
JA
1394 c = strstr(p, "cpumask=");
1395 if (c) {
1396 c += 8;
1397 fill_option(c, string);
1398 cpu = strtoul(string, NULL, 10);
f737299d 1399 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
1400 }
1401
4240cfa1
JA
1402 c = strstr(p, "fsync=");
1403 if (c) {
1404 c += 6;
1405 fill_option(c, string);
1406 td->fsync_blocks = strtoul(string, NULL, 10);
1407 }
18e0b78c 1408
fc24389f
JA
1409 c = strstr(p, "startdelay=");
1410 if (c) {
1411 c += 11;
1412 fill_option(c, string);
1413 td->start_delay = strtoul(string, NULL, 10);
1414 }
1415
67903a2e
JA
1416 c = strstr(p, "timeout=");
1417 if (c) {
1418 c += 8;
1419 fill_option(c, string);
1420 td->timeout = strtoul(string, NULL, 10);
1421 }
1422
b95799ca
JA
1423 c = strstr(p, "invalidate=");
1424 if (c) {
1425 c += 11;
1426 if (*c != '0')
1427 td->invalidate_cache = 1;
1428 else
1429 td->invalidate_cache = 0;
1430 }
1431
02983297
JA
1432 c = strstr(p, "size=");
1433 if (c) {
1434 c += 5;
1435 str_cnv(c, &td->file_size);
1436 }
1437
1438 c = strstr(p, "offset=");
1439 if (c) {
1440 c += 7;
1441 str_cnv(c, &td->file_offset);
1442 }
1443
43000118
JA
1444 c = strstr(p, "aio_depth=");
1445 if (c) {
1446 c += 10;
1447 fill_option(c, string);
1448 td->aio_depth = strtoul(string, NULL, 10);
1449 }
1450
1451 c = strstr(p, "aio");
1452 if (c)
1453 td->use_aio = 1;
1454
02983297
JA
1455 c = strstr(p, "create");
1456 if (c)
1457 td->create_file = 1;
1458
1459 c = strstr(p, "overwrite");
1460 if (c)
1461 td->overwrite = 1;
1462
892199bd
JA
1463 c = strstr(p, "random");
1464 if (c)
f737299d 1465 td->sequential = 0;
892199bd
JA
1466 c = strstr(p, "sequential");
1467 if (c)
f737299d 1468 td->sequential = 1;
892199bd 1469
4240cfa1
JA
1470 if (add_job(td, filename, prioclass, prio))
1471 put_job(td);
892199bd
JA
1472 }
1473
7dd1389e
JA
1474 free(string);
1475 free(filename);
892199bd
JA
1476}
1477
02983297
JA
1478static int check_strcnv(char *p, char *name, unsigned long long *val)
1479{
1480 if (!strstr(p, name))
1481 return 1;
1482
1483 return str_cnv(p, val);
1484}
1485
5c24b2c4 1486static int check_int(char *p, char *name, unsigned int *val)
7dd1389e
JA
1487{
1488 char str[128];
1489
1490 sprintf(str, "%s=%%d", name);
1491 if (sscanf(p, str, val) == 1)
1492 return 0;
1493
1494 sprintf(str, "%s = %%d", name);
1495 if (sscanf(p, str, val) == 1)
1496 return 0;
1497
1498 return 1;
1499}
1500
7292613b 1501static int is_empty_or_comment(char *line)
7dd1389e
JA
1502{
1503 unsigned int i;
1504
7292613b 1505 for (i = 0; i < strlen(line); i++) {
7292613b 1506 if (line[i] == ';')
47d45203
JA
1507 return 1;
1508 if (!isspace(line[i]) && !iscntrl(line[i]))
7292613b
JA
1509 return 0;
1510 }
7dd1389e
JA
1511
1512 return 1;
1513}
1514
5c24b2c4 1515static int parse_jobs_ini(char *file)
7dd1389e 1516{
47d45203 1517 unsigned int prioclass, prio, cpu, global;
f737299d 1518 struct thread_data *td;
7dd1389e
JA
1519 char *string, *name;
1520 fpos_t off;
1521 FILE *f;
1522 char *p;
1523
1524 f = fopen(file, "r");
1525 if (!f) {
1526 perror("fopen");
4240cfa1 1527 return 1;
7dd1389e
JA
1528 }
1529
1530 string = malloc(4096);
1531 name = malloc(256);
1532
7dd1389e 1533 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b
JA
1534 if (is_empty_or_comment(p))
1535 continue;
7dd1389e
JA
1536 if (sscanf(p, "[%s]", name) != 1)
1537 continue;
1538
47d45203
JA
1539 global = !strncmp(name, "global", 6);
1540
7dd1389e
JA
1541 name[strlen(name) - 1] = '\0';
1542
47d45203 1543 td = get_new_job(global);
4240cfa1
JA
1544 if (!td)
1545 break;
f737299d 1546
7dd1389e 1547 prioclass = 2;
f737299d 1548 prio = 4;
7dd1389e
JA
1549
1550 fgetpos(f, &off);
1551 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b 1552 if (is_empty_or_comment(p))
e6402082
JA
1553 continue;
1554 if (strstr(p, "["))
7dd1389e 1555 break;
f737299d
JA
1556 if (!check_int(p, "bs", &td->bs)) {
1557 td->bs <<= 10;
7dd1389e
JA
1558 fgetpos(f, &off);
1559 continue;
1560 }
f737299d 1561 if (!check_int(p, "rw", &td->ddir)) {
7dd1389e
JA
1562 fgetpos(f, &off);
1563 continue;
1564 }
1565 if (!check_int(p, "prio", &prio)) {
1566 fgetpos(f, &off);
1567 continue;
1568 }
1569 if (!check_int(p, "prioclass", &prioclass)) {
1570 fgetpos(f, &off);
1571 continue;
1572 }
f737299d 1573 if (!check_int(p, "direct", &td->odirect)) {
7dd1389e
JA
1574 fgetpos(f, &off);
1575 continue;
1576 }
f737299d 1577 if (!check_int(p, "rate", &td->rate)) {
7dd1389e
JA
1578 fgetpos(f, &off);
1579 continue;
1580 }
4240cfa1
JA
1581 if (!check_int(p, "ratemin", &td->ratemin)) {
1582 fgetpos(f, &off);
1583 continue;
1584 }
1585 if (!check_int(p, "ratecycle", &td->ratecycle)) {
1586 fgetpos(f, &off);
1587 continue;
1588 }
f737299d 1589 if (!check_int(p, "delay", &td->delay_sleep)) {
7dd1389e
JA
1590 fgetpos(f, &off);
1591 continue;
1592 }
18e0b78c 1593 if (!check_int(p, "cpumask", &cpu)) {
f737299d 1594 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
1595 fgetpos(f, &off);
1596 continue;
1597 }
4240cfa1
JA
1598 if (!check_int(p, "fsync", &td->fsync_blocks)) {
1599 fgetpos(f, &off);
1600 continue;
1601 }
fc24389f
JA
1602 if (!check_int(p, "startdelay", &td->start_delay)) {
1603 fgetpos(f, &off);
1604 continue;
1605 }
67903a2e
JA
1606 if (!check_int(p, "timeout", &td->timeout)) {
1607 fgetpos(f, &off);
1608 continue;
1609 }
b95799ca
JA
1610 if (!check_int(p, "invalidate",&td->invalidate_cache)) {
1611 fgetpos(f, &off);
1612 continue;
1613 }
43000118
JA
1614 if (!check_int(p, "aio_depth", &td->aio_depth)) {
1615 fgetpos(f, &off);
1616 continue;
1617 }
74b4b5fb
JA
1618 if (!check_int(p, "sync", &td->sync_io)) {
1619 fgetpos(f, &off);
1620 continue;
1621 }
02983297
JA
1622 if (!check_strcnv(p, "size", &td->file_size)) {
1623 fgetpos(f, &off);
1624 continue;
1625 }
1626 if (!check_strcnv(p, "offset", &td->file_offset)) {
1627 fgetpos(f, &off);
1628 continue;
1629 }
43000118 1630 if (!strncmp(p, "sequential", 10)) {
f737299d 1631 td->sequential = 1;
7dd1389e
JA
1632 fgetpos(f, &off);
1633 continue;
1634 }
43000118 1635 if (!strncmp(p, "random", 6)) {
f737299d 1636 td->sequential = 0;
7dd1389e
JA
1637 fgetpos(f, &off);
1638 continue;
1639 }
43000118
JA
1640 if (!strncmp(p, "aio", 3)) {
1641 td->use_aio = 1;
1642 fgetpos(f, &off);
1643 continue;
1644 }
02983297
JA
1645 if (!strncmp(p, "create", 6)) {
1646 td->create_file = 1;
1647 fgetpos(f, &off);
1648 continue;
1649 }
1650 if (!strncmp(p, "overwrite", 9)) {
1651 td->overwrite = 1;
1652 fgetpos(f, &off);
1653 continue;
1654 }
e6402082 1655 printf("Client%d: bad option %s\n",td->thread_number,p);
7dd1389e
JA
1656 }
1657 fsetpos(f, &off);
1658
4240cfa1
JA
1659 if (add_job(td, name, prioclass, prio))
1660 put_job(td);
7dd1389e
JA
1661 }
1662
1663 free(string);
1664 free(name);
fc7d63df 1665 fclose(f);
4240cfa1 1666 return 0;
7dd1389e
JA
1667}
1668
5c24b2c4 1669static int parse_options(int argc, char *argv[])
892199bd 1670{
01c4d8de 1671 int i;
892199bd
JA
1672
1673 for (i = 1; i < argc; i++) {
1674 char *parm = argv[i];
1675
1676 if (parm[0] != '-')
1677 break;
1678
1679 parm++;
1680 switch (*parm) {
1681 case 's':
1682 parm++;
47d45203 1683 def_thread.sequential = !!atoi(parm);
892199bd
JA
1684 break;
1685 case 'b':
1686 parm++;
47d45203
JA
1687 def_thread.bs = atoi(parm);
1688 def_thread.bs <<= 10;
1689 if (!def_thread.bs) {
4240cfa1 1690 printf("bad block size\n");
47d45203 1691 def_thread.bs = DEF_BS;
4240cfa1 1692 }
892199bd
JA
1693 break;
1694 case 't':
1695 parm++;
47d45203 1696 def_thread.timeout = atoi(parm);
892199bd 1697 break;
892199bd
JA
1698 case 'r':
1699 parm++;
1700 repeatable = !!atoi(parm);
1701 break;
02bdd9ba
JA
1702 case 'R':
1703 parm++;
1704 rate_quit = !!atoi(parm);
1705 break;
892199bd
JA
1706 case 'o':
1707 parm++;
47d45203 1708 def_thread.odirect = !!atoi(parm);
892199bd 1709 break;
7dd1389e
JA
1710 case 'f':
1711 if (i + 1 >= argc) {
1712 printf("-f needs file as arg\n");
1713 break;
1714 }
1715 ini_file = strdup(argv[i+1]);
a642279f 1716 i++;
7dd1389e 1717 break;
a0a9b35b
JA
1718 case 'l':
1719 write_lat_log = 1;
1720 break;
1721 case 'w':
1722 write_bw_log = 1;
1723 break;
892199bd 1724 default:
7dd1389e 1725 printf("bad option %s\n", argv[i]);
892199bd
JA
1726 break;
1727 }
1728 }
1729
892199bd
JA
1730 return i;
1731}
1732
3f39453a 1733static void print_thread_status(struct thread_data *td, int nr_running,
8dbff0b1 1734 int t_rate, int m_rate)
3f39453a 1735{
3f39453a
JA
1736 printf("Threads now running: %d", nr_running);
1737 if (m_rate || t_rate)
1738 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
8dbff0b1
JA
1739 printf(" : [%s]\r", run_str);
1740 fflush(stdout);
3f39453a
JA
1741}
1742
213b446c 1743static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1744{
213b446c 1745 int i;
02bdd9ba 1746
3f39453a
JA
1747 /*
1748 * reap exited threads (TD_EXITED -> TD_REAPED)
1749 */
02bdd9ba
JA
1750 for (i = 0; i < thread_number; i++) {
1751 struct thread_data *td = &threads[i];
1752
213b446c
JA
1753 if (td->runstate != TD_EXITED)
1754 continue;
02bdd9ba 1755
213b446c 1756 td->runstate = TD_REAPED;
8dbff0b1 1757 run_str[td->thread_number - 1] = '_';
213b446c
JA
1758 waitpid(td->pid, NULL, 0);
1759 (*nr_running)--;
1760 (*m_rate) -= td->ratemin;
1761 (*t_rate) -= td->rate;
e6402082
JA
1762
1763 if (td->terminate)
1764 continue;
1765
8dbff0b1 1766 print_thread_status(td, *nr_running, *t_rate, *m_rate);
213b446c 1767 }
02bdd9ba
JA
1768}
1769
fc24389f
JA
1770static void run_threads(char *argv[])
1771{
be33abe4 1772 struct timeval genesis;
fc24389f
JA
1773 struct thread_data *td;
1774 unsigned long spent;
213b446c 1775 int i, todo, nr_running, m_rate, t_rate;
fc24389f
JA
1776
1777 gettimeofday(&genesis, NULL);
1778
1779 printf("Starting %d threads\n", thread_number);
1780 fflush(stdout);
1781
7292613b
JA
1782 signal(SIGINT, sig_handler);
1783
fc24389f 1784 todo = thread_number;
02bdd9ba 1785 nr_running = 0;
213b446c 1786 m_rate = t_rate = 0;
fc24389f 1787
213b446c 1788 while (todo) {
3f39453a
JA
1789 /*
1790 * create threads (TD_NOT_CREATED -> TD_CREATED)
1791 */
fc24389f
JA
1792 for (i = 0; i < thread_number; i++) {
1793 td = &threads[i];
1794
02bdd9ba 1795 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1796 continue;
1797
213b446c
JA
1798 /*
1799 * never got a chance to start, killed by other
1800 * thread for some reason
1801 */
1802 if (td->terminate) {
1803 todo--;
1804 continue;
1805 }
1806
fc24389f 1807 if (td->start_delay) {
be33abe4 1808 spent = mtime_since_now(&genesis);
fc24389f
JA
1809
1810 if (td->start_delay * 1000 > spent)
1811 continue;
1812 }
1813
02bdd9ba 1814 td->runstate = TD_CREATED;
8dbff0b1 1815 run_str[td->thread_number - 1] = 'C';
fc24389f
JA
1816 sem_init(&startup_sem, 1, 1);
1817 todo--;
1818
1819 if (fork())
1820 sem_wait(&startup_sem);
1821 else {
1822 thread_main(shm_id, i, argv);
1823 exit(0);
1824 }
1825 }
1826
3f39453a
JA
1827 /*
1828 * start created threads (TD_CREATED -> TD_STARTED)
1829 */
fc24389f
JA
1830 for (i = 0; i < thread_number; i++) {
1831 struct thread_data *td = &threads[i];
1832
3f39453a
JA
1833 if (td->runstate != TD_CREATED)
1834 continue;
1835
1836 td->runstate = TD_STARTED;
8dbff0b1 1837 run_str[td->thread_number - 1] = '+';
3f39453a
JA
1838 nr_running++;
1839 m_rate += td->ratemin;
1840 t_rate += td->rate;
1841 sem_post(&td->mutex);
1842
8dbff0b1 1843 print_thread_status(td, nr_running, t_rate, m_rate);
fc24389f
JA
1844 }
1845
213b446c 1846 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1847
fc24389f
JA
1848 if (todo)
1849 usleep(100000);
1850 }
02bdd9ba
JA
1851
1852 while (nr_running) {
213b446c 1853 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1854 usleep(10000);
1855 }
fc24389f
JA
1856}
1857
8867c0a8 1858int setup_thread_area(void)
892199bd 1859{
8867c0a8
JA
1860 /*
1861 * 1024 is too much on some machines, scale max_jobs if
1862 * we get a failure that looks like too large a shm segment
1863 */
1864 do {
1865 int s = max_jobs * sizeof(struct thread_data);
18e0b78c 1866
8867c0a8
JA
1867 shm_id = shmget(0, s, IPC_CREAT | 0600);
1868 if (shm_id != -1)
1869 break;
1870 if (errno != EINVAL) {
1871 perror("shmget");
1872 break;
1873 }
1874
1875 max_jobs >>= 1;
d4fac444 1876 } while (max_jobs);
8867c0a8
JA
1877
1878 if (shm_id == -1)
892199bd 1879 return 1;
892199bd
JA
1880
1881 threads = shmat(shm_id, NULL, 0);
8867c0a8 1882 if (threads == (void *) -1) {
86184d14
JA
1883 perror("shmat");
1884 return 1;
1885 }
892199bd
JA
1886
1887 atexit(free_shm);
8867c0a8
JA
1888 return 0;
1889}
1890
1891int main(int argc, char *argv[])
1892{
1893 static unsigned long max_run[2], min_run[2], total_blocks[2];
57d753e3 1894 static unsigned long max_bw[2], min_bw[2];
8867c0a8
JA
1895 static unsigned long read_mb, write_mb, read_agg, write_agg;
1896 int i;
1897
1898 if (setup_thread_area())
1899 return 1;
892199bd 1900
47d45203 1901 if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &def_thread.cpumask) == -1) {
4240cfa1
JA
1902 perror("sched_getaffinity");
1903 return 1;
1904 }
1905
47d45203
JA
1906 /*
1907 * fill globals
1908 */
1909 def_thread.ddir = DDIR_READ;
1910 def_thread.bs = DEF_BS;
02983297 1911 def_thread.odirect = DEF_ODIRECT;
47d45203 1912 def_thread.ratecycle = DEF_RATE_CYCLE;
02983297 1913 def_thread.sequential = DEF_SEQUENTIAL;
47d45203 1914 def_thread.timeout = DEF_TIMEOUT;
02983297
JA
1915 def_thread.create_file = DEF_CREATE;
1916 def_thread.overwrite = DEF_OVERWRITE;
b95799ca 1917 def_thread.invalidate_cache = DEF_INVALIDATE;
47d45203 1918
892199bd 1919 i = parse_options(argc, argv);
7dd1389e 1920
4240cfa1
JA
1921 if (ini_file) {
1922 if (parse_jobs_ini(ini_file))
1923 return 1;
1924 } else
1925 parse_jobs_cmd(argc, argv, i);
7dd1389e 1926
4240cfa1
JA
1927 if (!thread_number) {
1928 printf("Nothing to do\n");
1929 return 1;
1930 }
7dd1389e 1931
fc24389f 1932 run_threads(argv);
892199bd 1933
892199bd
JA
1934 min_bw[0] = min_run[0] = ~0UL;
1935 min_bw[1] = min_run[1] = ~0UL;
892199bd
JA
1936 for (i = 0; i < thread_number; i++) {
1937 struct thread_data *td = &threads[i];
1938 unsigned long bw = 0;
1939
1940 if (td->error)
7dd1389e 1941 goto show_stat;
892199bd
JA
1942
1943 if (td->runtime < min_run[td->ddir])
1944 min_run[td->ddir] = td->runtime;
1945 if (td->runtime > max_run[td->ddir])
1946 max_run[td->ddir] = td->runtime;
1947
1948 if (td->runtime)
4240cfa1 1949 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1950 if (bw < min_bw[td->ddir])
1951 min_bw[td->ddir] = bw;
1952 if (bw > max_bw[td->ddir])
1953 max_bw[td->ddir] = bw;
892199bd 1954
4240cfa1 1955 total_blocks[td->ddir] += td->io_blocks;
892199bd 1956
02983297 1957 if (td_read(td)) {
4240cfa1 1958 read_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1959 if (td->runtime)
4240cfa1 1960 read_agg += (td->io_blocks * td->bs) / td->runtime;
02983297 1961 } else {
4240cfa1 1962 write_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1963 if (td->runtime)
4240cfa1 1964 write_agg += (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1965 }
1966
7dd1389e 1967show_stat:
892199bd
JA
1968 show_thread_status(td);
1969 }
1970
57d753e3 1971 printf("\nRun status:\n");
892199bd 1972 if (max_run[DDIR_READ])
57d753e3 1973 printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, min_bw[0], max_bw[0], min_run[0], max_run[0]);
892199bd 1974 if (max_run[DDIR_WRITE])
57d753e3 1975 printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, min_bw[1], max_bw[1], min_run[1], max_run[1]);
fc24389f 1976
892199bd
JA
1977 return 0;
1978}