[PATCH] fio: Add option 'invalidate' for killing page cache of file
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
7dd1389e 29#include <ctype.h>
18e0b78c 30#include <sched.h>
43000118 31#include <libaio.h>
e128065d 32#include <math.h>
02983297 33#include <limits.h>
892199bd
JA
34#include <sys/time.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <sys/wait.h>
38#include <semaphore.h>
39#include <sys/ipc.h>
40#include <sys/shm.h>
41#include <asm/unistd.h>
42
4240cfa1
JA
43#define MAX_JOBS (1024)
44
892199bd
JA
45/*
46 * assume we don't have _get either, if _set isn't defined
47 */
48#ifndef __NR_ioprio_set
892199bd
JA
49#if defined(__i386__)
50#define __NR_ioprio_set 289
51#define __NR_ioprio_get 290
52#elif defined(__powerpc__) || defined(__powerpc64__)
53#define __NR_ioprio_set 273
54#define __NR_ioprio_get 274
55#elif defined(__x86_64__)
56#define __NR_ioprio_set 251
57#define __NR_ioprio_get 252
58#elif defined(__ia64__)
59#define __NR_ioprio_set 1274
60#define __NR_ioprio_get 1275
61#elif defined(__alpha__)
62#define __NR_ioprio_set 442
63#define __NR_ioprio_get 443
64#elif defined(__s390x__) || defined(__s390__)
65#define __NR_ioprio_set 282
66#define __NR_ioprio_get 283
67#else
68#error "Unsupported arch"
69#endif
b95799ca 70#endif
892199bd 71
b95799ca
JA
72#ifndef __NR_fadvise64
73#if defined(__i386__)
74#define __NR_fadvise64 250
75#elif defined(__powerpc__) || defined(__powerpc64__)
76#define __NR_fadvise64 233
77#elif defined(__x86_64__)
78#define __NR_fadvise64 221
79#elif defined(__ia64__)
80#define __NR_fadvise64 1234
81#elif defined(__alpha__)
82#define __NR_fadvise64 413
83#elif defined(__s390x__) || defined(__s390__)
84#define __NR_fadvise64 253
85#else
86#error "Unsupported arch"
87#endif
892199bd
JA
88#endif
89
90static int ioprio_set(int which, int who, int ioprio)
91{
92 return syscall(__NR_ioprio_set, which, who, ioprio);
93}
94
b95799ca
JA
95/*
96 * we want fadvise64 really, but it's so tangled... later
97 */
98static int fadvise(int fd, loff_t offset, size_t len, int advice)
99{
100#if 0
101 return syscall(__NR_fadvise64, fd, offset, offset >> 32, len, advice);
102#else
103 return posix_fadvise(fd, (off_t) offset, len, advice);
104#endif
105}
106
892199bd
JA
107enum {
108 IOPRIO_WHO_PROCESS = 1,
109 IOPRIO_WHO_PGRP,
110 IOPRIO_WHO_USER,
111};
112
113#define IOPRIO_CLASS_SHIFT 13
114
892199bd
JA
115#define MASK (4095)
116
4240cfa1
JA
117#define DEF_BS (4096)
118#define DEF_TIMEOUT (30)
119#define DEF_RATE_CYCLE (1000)
120#define DEF_ODIRECT (1)
121#define DEF_SEQUENTIAL (1)
4240cfa1 122#define DEF_RAND_REPEAT (1)
02983297
JA
123#define DEF_OVERWRITE (0)
124#define DEF_CREATE (1)
b95799ca 125#define DEF_INVALIDATE (1)
4240cfa1
JA
126
127#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 128
4240cfa1 129static int repeatable = DEF_RAND_REPEAT;
02bdd9ba 130static int rate_quit = 1;
892199bd 131
892199bd 132static int thread_number;
7dd1389e 133static char *ini_file;
892199bd 134
8867c0a8
JA
135static int max_jobs = MAX_JOBS;
136
892199bd
JA
137static int shm_id;
138
4240cfa1
JA
139enum {
140 DDIR_READ = 0,
141 DDIR_WRITE,
142};
892199bd 143
02bdd9ba
JA
144/*
145 * thread life cycle
146 */
147enum {
148 TD_NOT_CREATED = 0,
149 TD_CREATED,
150 TD_STARTED,
151 TD_EXITED,
152 TD_REAPED,
153};
154
02983297
JA
155#define td_read(td) ((td)->ddir == DDIR_READ)
156#define should_fsync(td) (!td_read(td) && !(td)->odirect)
157
892199bd
JA
158struct thread_data {
159 char file_name[256];
160 int thread_number;
161 int error;
162 int fd;
892199bd 163 pid_t pid;
7292613b 164 char *buf;
4240cfa1 165 volatile int terminate;
02bdd9ba 166 volatile int runstate;
f737299d
JA
167 unsigned int ddir;
168 unsigned int ioprio;
169 unsigned int sequential;
170 unsigned int bs;
171 unsigned int odirect;
172 unsigned int delay_sleep;
4240cfa1 173 unsigned int fsync_blocks;
fc24389f 174 unsigned int start_delay;
47d45203 175 unsigned int timeout;
43000118 176 unsigned int use_aio;
02983297
JA
177 unsigned int create_file;
178 unsigned int overwrite;
b95799ca 179 unsigned int invalidate_cache;
02983297
JA
180 unsigned long long file_size;
181 unsigned long long file_offset;
18e0b78c 182 cpu_set_t cpumask;
86184d14 183
43000118
JA
184 io_context_t *aio_ctx;
185 struct iocb *aio_iocbs;
186 unsigned int aio_depth;
187 unsigned int aio_cur_depth;
188 struct io_event *aio_events;
189 char *aio_iocbs_status;
190
7dd1389e 191 unsigned int rate;
4240cfa1
JA
192 unsigned int ratemin;
193 unsigned int ratecycle;
194 unsigned long rate_usec_cycle;
195 long rate_pending_usleep;
196 unsigned long rate_blocks;
197 struct timeval lastrate;
86184d14 198
892199bd
JA
199 unsigned long max_latency; /* msec */
200 unsigned long min_latency; /* msec */
201 unsigned long runtime; /* sec */
202 unsigned long blocks;
4240cfa1 203 unsigned long io_blocks;
892199bd
JA
204 unsigned long last_block;
205 sem_t mutex;
892199bd
JA
206 struct drand48_data random_state;
207
208 /*
e128065d 209 * bandwidth and latency stats
892199bd
JA
210 */
211 unsigned long stat_time;
e128065d 212 unsigned long stat_time_sq;
fd1ae4c9
JA
213 unsigned long stat_time_samples;
214 unsigned long stat_io_blocks;
215 unsigned long stat_bw;
216 unsigned long stat_bw_sq;
217 unsigned long stat_bw_samples;
218 struct timeval stat_sample_time;
4240cfa1
JA
219
220 struct timeval start;
892199bd
JA
221};
222
223static struct thread_data *threads;
47d45203 224static struct thread_data def_thread;
892199bd
JA
225
226static sem_t startup_sem;
227
5c24b2c4 228static void sig_handler(int sig)
892199bd
JA
229{
230 int i;
231
213b446c
JA
232 for (i = 0; i < thread_number; i++) {
233 struct thread_data *td = &threads[i];
234
235 td->terminate = 1;
236 td->start_delay = 0;
237 }
02bdd9ba
JA
238}
239
5c24b2c4 240static int init_random_state(struct thread_data *td)
892199bd
JA
241{
242 unsigned long seed = 123;
243
244 if (td->sequential)
245 return 0;
246
247 if (!repeatable) {
248 int fd = open("/dev/random", O_RDONLY);
249
250 if (fd == -1) {
251 td->error = errno;
252 return 1;
253 }
254
7dd1389e 255 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
892199bd
JA
256 td->error = EIO;
257 close(fd);
258 return 1;
259 }
260
261 close(fd);
262 }
263
264 srand48_r(seed, &td->random_state);
265 return 0;
266}
267
5c24b2c4 268static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
269{
270 double sec, usec;
271
272 sec = e->tv_sec - s->tv_sec;
273 usec = e->tv_usec - s->tv_usec;
274 if (sec > 0 && usec < 0) {
275 sec--;
276 usec += 1000000;
277 }
278
279 sec *= (double) 1000000;
280
281 return sec + usec;
282}
283
5c24b2c4 284static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
285{
286 double sec, usec;
287
288 sec = e->tv_sec - s->tv_sec;
289 usec = e->tv_usec - s->tv_usec;
290 if (sec > 0 && usec < 0) {
291 sec--;
292 usec += 1000000;
293 }
294
295 sec *= (double) 1000;
296 usec /= (double) 1000;
297
298 return sec + usec;
299}
300
be33abe4
JA
301static unsigned long mtime_since_now(struct timeval *s)
302{
303 struct timeval t;
304
305 gettimeofday(&t, NULL);
306 return mtime_since(s, &t);
307}
308
98168d55
JA
309static inline unsigned long msec_now(struct timeval *s)
310{
311 return s->tv_sec * 1000 + s->tv_usec / 1000;
312}
313
5c24b2c4 314static unsigned long get_next_offset(struct thread_data *td)
892199bd
JA
315{
316 unsigned long b;
317 long r;
318
319 if (!td->sequential) {
320 lrand48_r(&td->random_state, &r);
321 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
322 } else {
323 b = td->last_block;
324 td->last_block++;
325 }
326
02983297 327 return b * td->bs + td->file_offset;
892199bd
JA
328}
329
5c24b2c4 330static void add_stat_sample(struct thread_data *td, unsigned long msec)
892199bd 331{
fd1ae4c9
JA
332 unsigned long spent;
333
892199bd 334 td->stat_time += msec;
e128065d 335 td->stat_time_sq += msec * msec;
fd1ae4c9
JA
336 td->stat_time_samples++;
337
338 spent = mtime_since_now(&td->stat_sample_time);
339 if (spent >= 500) {
340 unsigned long rate = ((td->io_blocks - td->stat_io_blocks) * td->bs) / spent;
341
342 td->stat_bw += rate;
343 td->stat_bw_sq += rate * rate;
344 gettimeofday(&td->stat_sample_time, NULL);
345 td->stat_io_blocks = td->io_blocks;
346 td->stat_bw_samples++;
347 }
892199bd
JA
348}
349
5c24b2c4 350static void usec_sleep(int usec)
892199bd 351{
86184d14
JA
352 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
353 struct timespec rem;
892199bd
JA
354
355 do {
86184d14
JA
356 rem.tv_sec = rem.tv_nsec = 0;
357 nanosleep(&req, &rem);
358 if (!rem.tv_nsec)
892199bd 359 break;
86184d14
JA
360
361 req.tv_nsec = rem.tv_nsec;
892199bd
JA
362 } while (1);
363}
364
5c24b2c4 365static void rate_throttle(struct thread_data *td, unsigned long time_spent)
86184d14 366{
4240cfa1
JA
367 if (!td->rate)
368 return;
369
86184d14
JA
370 if (time_spent < td->rate_usec_cycle) {
371 unsigned long s = td->rate_usec_cycle - time_spent;
372
373 td->rate_pending_usleep += s;
fad86e6a 374 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
375 usec_sleep(td->rate_pending_usleep);
376 td->rate_pending_usleep = 0;
377 }
4240cfa1 378 } else {
42b2b9fe
JA
379 long overtime = time_spent - td->rate_usec_cycle;
380
4240cfa1
JA
381 td->rate_pending_usleep -= overtime;
382 }
383}
384
5c24b2c4 385static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 386{
7607bc6b 387 unsigned long spent;
4240cfa1
JA
388 unsigned long rate;
389
390 /*
391 * allow a 2 second settle period in the beginning
392 */
7607bc6b 393 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
394 return 0;
395
396 /*
397 * if rate blocks is set, sample is running
398 */
399 if (td->rate_blocks) {
400 spent = mtime_since(&td->lastrate, now);
401 if (spent < td->ratecycle)
402 return 0;
403
404 rate = ((td->io_blocks - td->rate_blocks) * td->bs) / spent;
405 if (rate < td->ratemin) {
406 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 407 if (rate_quit)
e6402082 408 sig_handler(0);
4240cfa1
JA
409 return 1;
410 }
86184d14 411 }
4240cfa1
JA
412
413 td->rate_blocks = td->io_blocks;
414 memcpy(&td->lastrate, now, sizeof(*now));
415 return 0;
86184d14
JA
416}
417
67903a2e
JA
418static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
419{
420 if (mtime_since(&td->start, t) >= td->timeout * 1000)
421 return 1;
422
423 return 0;
424}
425
43000118 426static void do_sync_io(struct thread_data *td)
892199bd 427{
4240cfa1 428 struct timeval s, e;
86184d14 429 unsigned long blocks, msec, usec;
892199bd 430
892199bd
JA
431 for (blocks = 0; blocks < td->blocks; blocks++) {
432 off_t offset = get_next_offset(td);
433 int ret;
434
435 if (td->terminate)
436 break;
437
438 if (lseek(td->fd, offset, SEEK_SET) == -1) {
439 td->error = errno;
440 break;
441 }
442
443 if (td->delay_sleep)
86184d14 444 usec_sleep(td->delay_sleep);
892199bd
JA
445
446 gettimeofday(&s, NULL);
447
02983297 448 if (td_read(td))
7292613b 449 ret = read(td->fd, td->buf, td->bs);
892199bd 450 else
7292613b 451 ret = write(td->fd, td->buf, td->bs);
892199bd 452
f737299d 453 if (ret < (int) td->bs) {
892199bd
JA
454 if (ret == -1)
455 td->error = errno;
456 break;
457 }
458
4240cfa1
JA
459 td->io_blocks++;
460
461 if (should_fsync(td) && td->fsync_blocks &&
462 (td->io_blocks % td->fsync_blocks) == 0)
463 fsync(td->fd);
464
86184d14
JA
465 gettimeofday(&e, NULL);
466
467 usec = utime_since(&s, &e);
86184d14 468
4240cfa1 469 rate_throttle(td, usec);
892199bd 470
4240cfa1
JA
471 if (check_min_rate(td, &e)) {
472 td->error = ENODATA;
473 break;
474 }
892199bd 475
4240cfa1
JA
476 msec = usec / 1000;
477 add_stat_sample(td, msec);
892199bd
JA
478
479 if (msec < td->min_latency)
480 td->min_latency = msec;
481 if (msec > td->max_latency)
482 td->max_latency = msec;
67903a2e
JA
483
484 if (runtime_exceeded(td, &e))
485 break;
892199bd
JA
486 }
487
4240cfa1 488 if (should_fsync(td))
892199bd 489 fsync(td->fd);
892199bd 490}
43000118
JA
491
492static void aio_put_iocb(struct thread_data *td, struct iocb *iocb)
493{
494 long offset = ((long) iocb - (long) td->aio_iocbs)/ sizeof(struct iocb);
495
496 td->aio_iocbs_status[offset] = 0;
56b0eff0 497 td->aio_cur_depth--;
43000118
JA
498}
499
7292613b 500static struct iocb *aio_get_iocb(struct thread_data *td, struct timeval *t)
43000118
JA
501{
502 struct iocb *iocb = NULL;
56b0eff0 503 unsigned int i;
43000118
JA
504
505 for (i = 0; i < td->aio_depth; i++) {
506 if (td->aio_iocbs_status[i] == 0) {
507 td->aio_iocbs_status[i] = 1;
508 iocb = &td->aio_iocbs[i];
509 break;
510 }
511 }
512
513 if (iocb) {
514 off_t off = get_next_offset(td);
7292613b 515 char *p = td->buf + i * td->bs;
43000118 516
02983297 517 if (td_read(td))
43000118
JA
518 io_prep_pread(iocb, td->fd, p, td->bs, off);
519 else
520 io_prep_pwrite(iocb, td->fd, p, td->bs, off);
98168d55
JA
521
522 io_set_callback(iocb, (io_callback_t) msec_now(t));
43000118
JA
523 }
524
525 return iocb;
526}
527
56b0eff0
JA
528static int aio_submit(struct thread_data *td, struct iocb *iocb)
529{
530 int ret;
531
532 do {
533 ret = io_submit(*td->aio_ctx, 1, &iocb);
534 if (ret == 1)
535 return 0;
536
537 if (errno == EINTR)
538 continue;
539 else if (errno == EAGAIN)
540 usleep(100);
541 else
542 break;
543 } while (1);
544
545 return 1;
546}
547
98168d55
JA
548#define iocb_time(iocb) ((unsigned long) (iocb)->data)
549
43000118
JA
550static void do_async_io(struct thread_data *td)
551{
552 struct timeval s, e;
43000118 553 unsigned long blocks, msec, usec;
43000118 554
43000118
JA
555 for (blocks = 0; blocks < td->blocks; blocks++) {
556 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
557 struct timespec *timeout;
43000118 558 int ret, i, min_evts = 0;
8baf1bcc 559 struct iocb *iocb;
43000118
JA
560
561 if (td->terminate)
562 break;
563
564 if (td->delay_sleep)
565 usec_sleep(td->delay_sleep);
566
567 gettimeofday(&s, NULL);
568
7292613b 569 iocb = aio_get_iocb(td, &s);
8baf1bcc 570
56b0eff0
JA
571 ret = aio_submit(td, iocb);
572 if (ret) {
43000118
JA
573 td->error = errno;
574 break;
575 }
576
577 td->aio_cur_depth++;
43000118
JA
578
579 if (td->aio_cur_depth < td->aio_depth) {
580 timeout = &ts;
581 min_evts = 0;
582 } else {
583 timeout = NULL;
584 min_evts = 1;
585 }
586
587 ret = io_getevents(*td->aio_ctx, min_evts, td->aio_cur_depth, td->aio_events, timeout);
588 if (ret < 0) {
589 td->error = errno;
590 break;
591 } else if (!ret)
592 continue;
593
98168d55
JA
594 gettimeofday(&e, NULL);
595
43000118
JA
596 for (i = 0; i < ret; i++) {
597 struct io_event *ev = td->aio_events + i;
598
599 td->io_blocks++;
43000118
JA
600
601 iocb = ev->obj;
98168d55
JA
602
603 msec = msec_now(&e) - iocb_time(iocb);
604 add_stat_sample(td, msec);
605
606 if (msec < td->min_latency)
607 td->min_latency = msec;
608 if (msec > td->max_latency)
609 td->max_latency = msec;
610
43000118
JA
611 aio_put_iocb(td, iocb);
612 }
613
98168d55
JA
614 /*
615 * the rate is batched for now, it should work for batches
616 * of completions except the very first one which may look
617 * a little bursty
618 */
43000118
JA
619 usec = utime_since(&s, &e);
620
621 rate_throttle(td, usec);
622
623 if (check_min_rate(td, &e)) {
624 td->error = ENODATA;
625 break;
626 }
67903a2e
JA
627
628 if (runtime_exceeded(td, &e))
629 break;
43000118 630 }
43000118
JA
631}
632
56b0eff0 633static void cleanup_pending_aio(struct thread_data *td)
43000118 634{
56b0eff0
JA
635 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
636 unsigned int i;
637 int r;
638
43000118 639 /*
56b0eff0 640 * get immediately available events, if any
43000118 641 */
56b0eff0
JA
642 r = io_getevents(*td->aio_ctx, 0, td->aio_cur_depth, td->aio_events, &ts);
643 if (r > 0) {
644 for (i = 0; i < r; i++)
645 aio_put_iocb(td, &td->aio_iocbs[i]);
646 }
647
648 /*
649 * now cancel remaining active events
650 */
651 for (i = 0; i < td->aio_depth; i++) {
652 if (td->aio_iocbs_status[i] == 0)
653 continue;
654
655 r = io_cancel(*td->aio_ctx, &td->aio_iocbs[i], td->aio_events);
656 if (!r)
657 aio_put_iocb(td, &td->aio_iocbs[i]);
658 }
659
43000118
JA
660 if (td->aio_cur_depth)
661 io_getevents(*td->aio_ctx, td->aio_cur_depth, td->aio_cur_depth, td->aio_events, NULL);
56b0eff0
JA
662}
663
664static void cleanup_aio(struct thread_data *td)
665{
666 if (td->aio_cur_depth)
667 cleanup_pending_aio(td);
43000118
JA
668
669 if (td->aio_ctx) {
670 io_destroy(*td->aio_ctx);
671 free(td->aio_ctx);
672 }
673 if (td->aio_iocbs)
674 free(td->aio_iocbs);
675 if (td->aio_events)
676 free(td->aio_events);
677 if (td->aio_iocbs_status)
678 free(td->aio_iocbs_status);
679}
680
681static int init_aio(struct thread_data *td)
682{
683 td->aio_ctx = malloc(sizeof(*td->aio_ctx));
684
685 if (io_queue_init(td->aio_depth, td->aio_ctx)) {
686 td->error = errno;
687 return 1;
688 }
689
690 td->aio_iocbs = malloc(td->aio_depth * sizeof(struct iocb));
691 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
692 td->aio_iocbs_status = malloc(td->aio_depth * sizeof(char));
693 return 0;
694}
695
02983297
JA
696static int create_file(struct thread_data *td)
697{
698 unsigned int i;
699 char *b;
700
701 if (!td->file_size) {
702 fprintf(stderr, "Need size for create\n");
703 td->error = EINVAL;
704 return 1;
705 }
706
707 /*
708 * unless specifically asked for overwrite, let normal io extend it
709 */
710 if (!td_read(td) && !td->overwrite)
711 return 0;
712
713 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
714 if (td->fd < 0) {
715 td->error = errno;
716 return 1;
717 }
718
719 td->blocks = td->file_size / td->bs;
720 b = malloc(td->bs);
721 memset(b, 0, td->bs);
722
723 for (i = 0; i < td->blocks; i++) {
724 int r = write(td->fd, b, td->bs);
725
726 if (r == td->bs)
727 continue;
728 else {
729 if (r < 0)
730 td->error = errno;
731 else
732 td->error = EIO;
733
734 break;
735 }
736 }
737
738 fsync(td->fd);
739 close(td->fd);
740 td->fd = -1;
741 free(b);
742 return 0;
743}
744
745static int file_exists(struct thread_data *td)
746{
747 struct stat st;
748
749 if (stat(td->file_name, &st) != -1)
750 return 1;
751
752 return errno != ENOENT;
753}
754
755static int setup_file(struct thread_data *td)
756{
757 struct stat st;
758 int flags = 0;
759
760 if (!file_exists(td)) {
761 if (!td->create_file) {
762 td->error = ENOENT;
763 return 1;
764 }
765 if (create_file(td))
766 return 1;
767 }
768
769 if (td->odirect)
770 flags |= O_DIRECT;
771
772 if (td_read(td))
773 td->fd = open(td->file_name, flags | O_RDONLY);
774 else {
775 if (!td->overwrite)
776 flags |= O_TRUNC;
777
778 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT, 0600);
779 }
780
781 if (td->fd == -1) {
782 td->error = errno;
783 return 1;
784 }
785
786 if (td_read(td)) {
787 if (fstat(td->fd, &st) == -1) {
788 td->error = errno;
789 return 1;
790 }
791
792 if (td->file_size > st.st_size)
793 st.st_size = td->file_size;
794 } else {
795 if (!td->file_size)
796 td->file_size = 1024 * 1024 * 1024;
797
798 st.st_size = td->file_size;
799 }
800
801 td->blocks = (st.st_size - td->file_offset) / td->bs;
802 if (!td->blocks) {
803 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
804 td->error = EINVAL;
805 return 1;
806 }
807
b95799ca
JA
808 if (td->invalidate_cache) {
809 if (fadvise(td->fd, 0, st.st_size, POSIX_FADV_DONTNEED) < 0) {
810 td->error = errno;
811 return 1;
812 }
813 }
814
02983297
JA
815 return 0;
816}
817
5c24b2c4 818static void *thread_main(int shm_id, int offset, char *argv[])
892199bd
JA
819{
820 struct thread_data *td;
7292613b 821 void *data, *ptr = NULL;
02983297 822 int ret = 1;
892199bd 823
7292613b
JA
824 setsid();
825
892199bd
JA
826 data = shmat(shm_id, NULL, 0);
827 td = data + offset * sizeof(struct thread_data);
828 td->pid = getpid();
829
18e0b78c
JA
830 td->fd = -1;
831
832 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
833 td->error = errno;
834 goto err;
835 }
836
a642279f 837 printf("Client%d (pid=%u) started\n", td->thread_number, td->pid);
892199bd 838
4240cfa1 839 sprintf(argv[0], "fio%d", offset);
892199bd 840
43000118
JA
841 if (td->use_aio && init_aio(td))
842 goto err;
843
892199bd 844 if (init_random_state(td))
599002b3 845 goto err;
892199bd 846
f737299d 847 if (td->ioprio) {
892199bd
JA
848 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
849 td->error = errno;
599002b3 850 goto err;
892199bd
JA
851 }
852 }
853
02983297
JA
854 if (setup_file(td))
855 goto err;
856
892199bd
JA
857 sem_post(&startup_sem);
858 sem_wait(&td->mutex);
43000118 859
7292613b
JA
860 gettimeofday(&td->start, NULL);
861
862 if (td->ratemin)
863 memcpy(&td->lastrate, &td->start, sizeof(td->start));
864
fd1ae4c9
JA
865 memcpy(&td->stat_sample_time, &td->start, sizeof(td->start));
866
7292613b
JA
867 if (!td->use_aio) {
868 ptr = malloc(td->bs + MASK);
869 td->buf = ALIGN(ptr);
43000118 870 do_sync_io(td);
7292613b
JA
871 } else {
872 ptr = malloc(td->bs * td->aio_depth + MASK);
873 td->buf = ALIGN(ptr);
43000118 874 do_async_io(td);
7292613b
JA
875 }
876
be33abe4 877 td->runtime = mtime_since_now(&td->start);
892199bd 878 ret = 0;
892199bd 879err:
43000118
JA
880 if (td->use_aio)
881 cleanup_aio(td);
7292613b
JA
882 if (td->fd != -1) {
883 close(td->fd);
884 td->fd = -1;
885 }
599002b3 886 if (ret) {
892199bd 887 sem_post(&startup_sem);
599002b3
JA
888 sem_wait(&td->mutex);
889 }
7292613b
JA
890 if (ptr)
891 free(ptr);
02bdd9ba 892 td->runstate = TD_EXITED;
4240cfa1 893 shmdt(data);
892199bd
JA
894 return NULL;
895}
896
5c24b2c4 897static void free_shm(void)
892199bd
JA
898{
899 shmdt(threads);
900}
901
5c24b2c4 902static void show_thread_status(struct thread_data *td)
892199bd
JA
903{
904 int prio, prio_class;
905 unsigned long bw = 0;
fd1ae4c9 906 double n_lat, n_bw, m_lat, m_bw, dev_lat, dev_bw;
892199bd 907
213b446c
JA
908 if (!td->io_blocks && !td->error)
909 return;
910
892199bd 911 if (td->runtime)
4240cfa1 912 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
913
914 prio = td->ioprio & 0xff;
915 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
916
fd1ae4c9
JA
917 n_lat = (double) td->stat_time_samples;
918 n_bw = (double) td->stat_bw_samples;
919
920 m_lat = (double) td->stat_time / n_lat;
921 dev_lat = sqrt(((double) td->stat_time_sq - (m_lat * m_lat) / n_lat) / (n_lat - 1));
922 m_bw = (double) td->stat_bw / n_bw;
923 dev_bw = sqrt(((double) td->stat_bw_sq - (m_bw * m_bw) / n_bw) / (n_bw - 1));
e128065d 924
a642279f 925 printf("Client%d: err=%2d, io=%6luMiB, bw=%6luKiB/sec, latmax=%5lumsec, latavg=%5.02fmsec, latdev=%5.02fmsec, bwavg=%5.02fKiB/sec, bwdev=%5.02fKiB/sec\n", td->thread_number, td->error, td->io_blocks * td->bs >> 20, bw, td->max_latency, m_lat, dev_lat, m_bw, dev_bw);
892199bd
JA
926}
927
5c24b2c4 928static int setup_rate(struct thread_data *td)
86184d14 929{
4240cfa1
JA
930 int nr_reads_per_sec;
931
932 if (!td->rate)
933 return 0;
934
935 if (td->rate < td->ratemin) {
936 fprintf(stderr, "min rate larger than nominal rate\n");
937 return -1;
938 }
86184d14 939
4240cfa1 940 nr_reads_per_sec = td->rate * 1024 / td->bs;
86184d14
JA
941 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
942 td->rate_pending_usleep = 0;
4240cfa1 943 return 0;
86184d14
JA
944}
945
47d45203 946static struct thread_data *get_new_job(int global)
892199bd 947{
4240cfa1
JA
948 struct thread_data *td;
949
47d45203
JA
950 if (global)
951 return &def_thread;
8867c0a8 952 if (thread_number >= max_jobs)
4240cfa1
JA
953 return NULL;
954
955 td = &threads[thread_number++];
fc24389f 956 memset(td, 0, sizeof(*td));
892199bd 957
86184d14 958 td->thread_number = thread_number;
47d45203
JA
959 td->ddir = def_thread.ddir;
960 td->bs = def_thread.bs;
961 td->odirect = def_thread.odirect;
962 td->ratecycle = def_thread.ratecycle;
963 td->sequential = def_thread.sequential;
67903a2e 964 td->timeout = def_thread.timeout;
02983297
JA
965 td->create_file = def_thread.create_file;
966 td->overwrite = def_thread.overwrite;
b95799ca 967 td->invalidate_cache = def_thread.invalidate_cache;
47d45203 968 memcpy(&td->cpumask, &def_thread.cpumask, sizeof(td->cpumask));
f737299d
JA
969
970 return td;
971}
972
4240cfa1
JA
973static void put_job(struct thread_data *td)
974{
975 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
976 thread_number--;
977}
978
5c24b2c4
JA
979static int add_job(struct thread_data *td, const char *filename, int prioclass,
980 int prio)
f737299d 981{
47d45203
JA
982 if (td == &def_thread)
983 return 0;
984
f737299d 985 strcpy(td->file_name, filename);
4240cfa1 986 sem_init(&td->mutex, 1, 0);
892199bd 987 td->min_latency = 10000000;
f737299d
JA
988 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
989
43000118
JA
990 if (td->use_aio && !td->aio_depth)
991 td->aio_depth = 1;
992
4240cfa1
JA
993 if (setup_rate(td))
994 return -1;
f737299d 995
e128065d 996 printf("Client%d: file=%s, rw=%d, prio=%d/%d, seq=%d, odir=%d, bs=%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, filename, td->ddir, prioclass, prio, td->sequential, td->odirect, td->bs, td->rate, td->use_aio, td->aio_depth);
4240cfa1 997 return 0;
892199bd
JA
998}
999
18e0b78c
JA
1000static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
1001{
f737299d 1002 unsigned int i;
18e0b78c
JA
1003
1004 CPU_ZERO(&cpumask);
1005
1006 for (i = 0; i < sizeof(int) * 8; i++) {
1007 if ((1 << i) & cpu)
1008 CPU_SET(i, &cpumask);
1009 }
1010}
1011
5c24b2c4 1012static void fill_option(const char *input, char *output)
892199bd
JA
1013{
1014 int i;
1015
1016 i = 0;
1017 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
1018 output[i] = input[i];
1019 i++;
1020 }
1021
1022 output[i] = '\0';
1023}
1024
02983297
JA
1025/*
1026 * convert string after '=' into decimal value, noting any size suffix
1027 */
1028static int str_cnv(char *p, unsigned long long *val)
1029{
1030 unsigned long mult;
1031 char *str;
1032 int len;
1033
1034 str = strstr(p, "=");
1035 if (!str)
1036 return 1;
1037
1038 str++;
1039 len = strlen(str);
1040 mult = 1;
1041
1042 switch (str[len - 2]) {
1043 case 'k':
1044 case 'K':
1045 mult = 1024;
1046 break;
1047 case 'm':
1048 case 'M':
1049 mult = 1024 * 1024;
1050 break;
1051 case 'g':
1052 case 'G':
1053 mult = 1024 * 1024 * 1024;
1054 break;
1055 }
1056
1057 *val = strtoul(str, NULL, 10);
1058 if (*val == ULONG_MAX && errno == ERANGE)
1059 return 1;
1060
1061 *val *= mult;
1062 return 0;
1063
1064}
1065
892199bd
JA
1066/*
1067 * job key words:
1068 *
1069 * file=
1070 * bs=
1071 * rw=
1072 * direct=
1073 */
5c24b2c4 1074static void parse_jobs_cmd(int argc, char *argv[], int index)
892199bd 1075{
f737299d
JA
1076 struct thread_data *td;
1077 unsigned int prio, prioclass, cpu;
892199bd
JA
1078 char *string, *filename, *p, *c;
1079 int i;
1080
1081 string = malloc(256);
1082 filename = malloc(256);
1083
1084 for (i = index; i < argc; i++) {
1085 p = argv[i];
1086
1087 c = strpbrk(p, "{");
1088 if (!c)
1089 break;
1090
1091 filename[0] = 0;
4240cfa1 1092
47d45203 1093 td = get_new_job(0);
4240cfa1
JA
1094 if (!td)
1095 break;
f737299d 1096
892199bd 1097 prioclass = 2;
f737299d 1098 prio = 4;
892199bd
JA
1099
1100 c = strstr(p, "rw=");
1101 if (c) {
1102 c += 3;
1103 if (*c == '0')
f737299d 1104 td->ddir = DDIR_READ;
892199bd 1105 else
f737299d 1106 td->ddir = DDIR_WRITE;
892199bd
JA
1107 }
1108
1109 c = strstr(p, "prio=");
1110 if (c) {
1111 c += 5;
1112 prio = *c - '0';
1113 }
1114
1115 c = strstr(p, "prioclass=");
1116 if (c) {
1117 c += 10;
1118 prioclass = *c - '0';
1119 }
1120
1121 c = strstr(p, "file=");
1122 if (c) {
1123 c += 5;
1124 fill_option(c, filename);
1125 }
1126
1127 c = strstr(p, "bs=");
1128 if (c) {
1129 c += 3;
1130 fill_option(c, string);
f737299d
JA
1131 td->bs = strtoul(string, NULL, 10);
1132 td->bs <<= 10;
892199bd
JA
1133 }
1134
1135 c = strstr(p, "direct=");
1136 if (c) {
1137 c += 7;
1138 if (*c != '0')
f737299d 1139 td->odirect = 1;
892199bd 1140 else
f737299d 1141 td->odirect = 0;
892199bd
JA
1142 }
1143
1144 c = strstr(p, "delay=");
1145 if (c) {
1146 c += 6;
1147 fill_option(c, string);
f737299d 1148 td->delay_sleep = strtoul(string, NULL, 10);
892199bd
JA
1149 }
1150
86184d14
JA
1151 c = strstr(p, "rate=");
1152 if (c) {
1153 c += 5;
1154 fill_option(c, string);
f737299d 1155 td->rate = strtoul(string, NULL, 10);
86184d14
JA
1156 }
1157
4240cfa1
JA
1158 c = strstr(p, "ratemin=");
1159 if (c) {
1160 c += 8;
1161 fill_option(c, string);
1162 td->ratemin = strtoul(string, NULL, 10);
1163 }
1164
1165 c = strstr(p, "ratecycle=");
1166 if (c) {
1167 c += 10;
1168 fill_option(c, string);
1169 td->ratecycle = strtoul(string, NULL, 10);
1170 }
1171
18e0b78c
JA
1172 c = strstr(p, "cpumask=");
1173 if (c) {
1174 c += 8;
1175 fill_option(c, string);
1176 cpu = strtoul(string, NULL, 10);
f737299d 1177 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
1178 }
1179
4240cfa1
JA
1180 c = strstr(p, "fsync=");
1181 if (c) {
1182 c += 6;
1183 fill_option(c, string);
1184 td->fsync_blocks = strtoul(string, NULL, 10);
1185 }
18e0b78c 1186
fc24389f
JA
1187 c = strstr(p, "startdelay=");
1188 if (c) {
1189 c += 11;
1190 fill_option(c, string);
1191 td->start_delay = strtoul(string, NULL, 10);
1192 }
1193
67903a2e
JA
1194 c = strstr(p, "timeout=");
1195 if (c) {
1196 c += 8;
1197 fill_option(c, string);
1198 td->timeout = strtoul(string, NULL, 10);
1199 }
1200
b95799ca
JA
1201 c = strstr(p, "invalidate=");
1202 if (c) {
1203 c += 11;
1204 if (*c != '0')
1205 td->invalidate_cache = 1;
1206 else
1207 td->invalidate_cache = 0;
1208 }
1209
02983297
JA
1210 c = strstr(p, "size=");
1211 if (c) {
1212 c += 5;
1213 str_cnv(c, &td->file_size);
1214 }
1215
1216 c = strstr(p, "offset=");
1217 if (c) {
1218 c += 7;
1219 str_cnv(c, &td->file_offset);
1220 }
1221
43000118
JA
1222 c = strstr(p, "aio_depth=");
1223 if (c) {
1224 c += 10;
1225 fill_option(c, string);
1226 td->aio_depth = strtoul(string, NULL, 10);
1227 }
1228
1229 c = strstr(p, "aio");
1230 if (c)
1231 td->use_aio = 1;
1232
02983297
JA
1233 c = strstr(p, "create");
1234 if (c)
1235 td->create_file = 1;
1236
1237 c = strstr(p, "overwrite");
1238 if (c)
1239 td->overwrite = 1;
1240
892199bd
JA
1241 c = strstr(p, "random");
1242 if (c)
f737299d 1243 td->sequential = 0;
892199bd
JA
1244 c = strstr(p, "sequential");
1245 if (c)
f737299d 1246 td->sequential = 1;
892199bd 1247
4240cfa1
JA
1248 if (add_job(td, filename, prioclass, prio))
1249 put_job(td);
892199bd
JA
1250 }
1251
7dd1389e
JA
1252 free(string);
1253 free(filename);
892199bd
JA
1254}
1255
02983297
JA
1256static int check_strcnv(char *p, char *name, unsigned long long *val)
1257{
1258 if (!strstr(p, name))
1259 return 1;
1260
1261 return str_cnv(p, val);
1262}
1263
5c24b2c4 1264static int check_int(char *p, char *name, unsigned int *val)
7dd1389e
JA
1265{
1266 char str[128];
1267
1268 sprintf(str, "%s=%%d", name);
1269 if (sscanf(p, str, val) == 1)
1270 return 0;
1271
1272 sprintf(str, "%s = %%d", name);
1273 if (sscanf(p, str, val) == 1)
1274 return 0;
1275
1276 return 1;
1277}
1278
7292613b 1279static int is_empty_or_comment(char *line)
7dd1389e
JA
1280{
1281 unsigned int i;
1282
7292613b 1283 for (i = 0; i < strlen(line); i++) {
7292613b 1284 if (line[i] == ';')
47d45203
JA
1285 return 1;
1286 if (!isspace(line[i]) && !iscntrl(line[i]))
7292613b
JA
1287 return 0;
1288 }
7dd1389e
JA
1289
1290 return 1;
1291}
1292
5c24b2c4 1293static int parse_jobs_ini(char *file)
7dd1389e 1294{
47d45203 1295 unsigned int prioclass, prio, cpu, global;
f737299d 1296 struct thread_data *td;
7dd1389e
JA
1297 char *string, *name;
1298 fpos_t off;
1299 FILE *f;
1300 char *p;
1301
1302 f = fopen(file, "r");
1303 if (!f) {
1304 perror("fopen");
4240cfa1 1305 return 1;
7dd1389e
JA
1306 }
1307
1308 string = malloc(4096);
1309 name = malloc(256);
1310
7dd1389e 1311 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b
JA
1312 if (is_empty_or_comment(p))
1313 continue;
7dd1389e
JA
1314 if (sscanf(p, "[%s]", name) != 1)
1315 continue;
1316
47d45203
JA
1317 global = !strncmp(name, "global", 6);
1318
7dd1389e
JA
1319 name[strlen(name) - 1] = '\0';
1320
47d45203 1321 td = get_new_job(global);
4240cfa1
JA
1322 if (!td)
1323 break;
f737299d 1324
7dd1389e 1325 prioclass = 2;
f737299d 1326 prio = 4;
7dd1389e
JA
1327
1328 fgetpos(f, &off);
1329 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b 1330 if (is_empty_or_comment(p))
e6402082
JA
1331 continue;
1332 if (strstr(p, "["))
7dd1389e 1333 break;
f737299d
JA
1334 if (!check_int(p, "bs", &td->bs)) {
1335 td->bs <<= 10;
7dd1389e
JA
1336 fgetpos(f, &off);
1337 continue;
1338 }
f737299d 1339 if (!check_int(p, "rw", &td->ddir)) {
7dd1389e
JA
1340 fgetpos(f, &off);
1341 continue;
1342 }
1343 if (!check_int(p, "prio", &prio)) {
1344 fgetpos(f, &off);
1345 continue;
1346 }
1347 if (!check_int(p, "prioclass", &prioclass)) {
1348 fgetpos(f, &off);
1349 continue;
1350 }
f737299d 1351 if (!check_int(p, "direct", &td->odirect)) {
7dd1389e
JA
1352 fgetpos(f, &off);
1353 continue;
1354 }
f737299d 1355 if (!check_int(p, "rate", &td->rate)) {
7dd1389e
JA
1356 fgetpos(f, &off);
1357 continue;
1358 }
4240cfa1
JA
1359 if (!check_int(p, "ratemin", &td->ratemin)) {
1360 fgetpos(f, &off);
1361 continue;
1362 }
1363 if (!check_int(p, "ratecycle", &td->ratecycle)) {
1364 fgetpos(f, &off);
1365 continue;
1366 }
f737299d 1367 if (!check_int(p, "delay", &td->delay_sleep)) {
7dd1389e
JA
1368 fgetpos(f, &off);
1369 continue;
1370 }
18e0b78c 1371 if (!check_int(p, "cpumask", &cpu)) {
f737299d 1372 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
1373 fgetpos(f, &off);
1374 continue;
1375 }
4240cfa1
JA
1376 if (!check_int(p, "fsync", &td->fsync_blocks)) {
1377 fgetpos(f, &off);
1378 continue;
1379 }
fc24389f
JA
1380 if (!check_int(p, "startdelay", &td->start_delay)) {
1381 fgetpos(f, &off);
1382 continue;
1383 }
67903a2e
JA
1384 if (!check_int(p, "timeout", &td->timeout)) {
1385 fgetpos(f, &off);
1386 continue;
1387 }
b95799ca
JA
1388 if (!check_int(p, "invalidate",&td->invalidate_cache)) {
1389 fgetpos(f, &off);
1390 continue;
1391 }
43000118
JA
1392 if (!check_int(p, "aio_depth", &td->aio_depth)) {
1393 fgetpos(f, &off);
1394 continue;
1395 }
02983297
JA
1396 if (!check_strcnv(p, "size", &td->file_size)) {
1397 fgetpos(f, &off);
1398 continue;
1399 }
1400 if (!check_strcnv(p, "offset", &td->file_offset)) {
1401 fgetpos(f, &off);
1402 continue;
1403 }
43000118 1404 if (!strncmp(p, "sequential", 10)) {
f737299d 1405 td->sequential = 1;
7dd1389e
JA
1406 fgetpos(f, &off);
1407 continue;
1408 }
43000118 1409 if (!strncmp(p, "random", 6)) {
f737299d 1410 td->sequential = 0;
7dd1389e
JA
1411 fgetpos(f, &off);
1412 continue;
1413 }
43000118
JA
1414 if (!strncmp(p, "aio", 3)) {
1415 td->use_aio = 1;
1416 fgetpos(f, &off);
1417 continue;
1418 }
02983297
JA
1419 if (!strncmp(p, "create", 6)) {
1420 td->create_file = 1;
1421 fgetpos(f, &off);
1422 continue;
1423 }
1424 if (!strncmp(p, "overwrite", 9)) {
1425 td->overwrite = 1;
1426 fgetpos(f, &off);
1427 continue;
1428 }
e6402082 1429 printf("Client%d: bad option %s\n",td->thread_number,p);
7dd1389e
JA
1430 }
1431 fsetpos(f, &off);
1432
4240cfa1
JA
1433 if (add_job(td, name, prioclass, prio))
1434 put_job(td);
7dd1389e
JA
1435 }
1436
1437 free(string);
1438 free(name);
fc7d63df 1439 fclose(f);
4240cfa1 1440 return 0;
7dd1389e
JA
1441}
1442
5c24b2c4 1443static int parse_options(int argc, char *argv[])
892199bd 1444{
01c4d8de 1445 int i;
892199bd
JA
1446
1447 for (i = 1; i < argc; i++) {
1448 char *parm = argv[i];
1449
1450 if (parm[0] != '-')
1451 break;
1452
1453 parm++;
1454 switch (*parm) {
1455 case 's':
1456 parm++;
47d45203 1457 def_thread.sequential = !!atoi(parm);
892199bd
JA
1458 break;
1459 case 'b':
1460 parm++;
47d45203
JA
1461 def_thread.bs = atoi(parm);
1462 def_thread.bs <<= 10;
1463 if (!def_thread.bs) {
4240cfa1 1464 printf("bad block size\n");
47d45203 1465 def_thread.bs = DEF_BS;
4240cfa1 1466 }
892199bd
JA
1467 break;
1468 case 't':
1469 parm++;
47d45203 1470 def_thread.timeout = atoi(parm);
892199bd 1471 break;
892199bd
JA
1472 case 'r':
1473 parm++;
1474 repeatable = !!atoi(parm);
1475 break;
02bdd9ba
JA
1476 case 'R':
1477 parm++;
1478 rate_quit = !!atoi(parm);
1479 break;
892199bd
JA
1480 case 'o':
1481 parm++;
47d45203 1482 def_thread.odirect = !!atoi(parm);
892199bd 1483 break;
7dd1389e
JA
1484 case 'f':
1485 if (i + 1 >= argc) {
1486 printf("-f needs file as arg\n");
1487 break;
1488 }
1489 ini_file = strdup(argv[i+1]);
a642279f 1490 i++;
7dd1389e 1491 break;
892199bd 1492 default:
7dd1389e 1493 printf("bad option %s\n", argv[i]);
892199bd
JA
1494 break;
1495 }
1496 }
1497
892199bd
JA
1498 return i;
1499}
1500
3f39453a
JA
1501static void print_thread_status(struct thread_data *td, int nr_running,
1502 int t_rate, int m_rate, int die)
1503{
1504 printf("Client%d: %s\n", td->thread_number, die ? "exited" : "spawned");
1505
1506 printf("Threads now running: %d", nr_running);
1507 if (m_rate || t_rate)
1508 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
1509 printf("\n");
1510}
1511
213b446c 1512static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1513{
213b446c 1514 int i;
02bdd9ba 1515
3f39453a
JA
1516 /*
1517 * reap exited threads (TD_EXITED -> TD_REAPED)
1518 */
02bdd9ba
JA
1519 for (i = 0; i < thread_number; i++) {
1520 struct thread_data *td = &threads[i];
1521
213b446c
JA
1522 if (td->runstate != TD_EXITED)
1523 continue;
02bdd9ba 1524
213b446c
JA
1525 td->runstate = TD_REAPED;
1526 waitpid(td->pid, NULL, 0);
1527 (*nr_running)--;
1528 (*m_rate) -= td->ratemin;
1529 (*t_rate) -= td->rate;
e6402082
JA
1530
1531 if (td->terminate)
1532 continue;
1533
3f39453a 1534 print_thread_status(td, *nr_running, *t_rate, *m_rate, 1);
213b446c 1535 }
02bdd9ba
JA
1536}
1537
fc24389f
JA
1538static void run_threads(char *argv[])
1539{
be33abe4 1540 struct timeval genesis;
fc24389f
JA
1541 struct thread_data *td;
1542 unsigned long spent;
213b446c 1543 int i, todo, nr_running, m_rate, t_rate;
fc24389f
JA
1544
1545 gettimeofday(&genesis, NULL);
1546
1547 printf("Starting %d threads\n", thread_number);
1548 fflush(stdout);
1549
7292613b
JA
1550 signal(SIGINT, sig_handler);
1551
fc24389f 1552 todo = thread_number;
02bdd9ba 1553 nr_running = 0;
213b446c 1554 m_rate = t_rate = 0;
fc24389f 1555
213b446c 1556 while (todo) {
3f39453a
JA
1557 /*
1558 * create threads (TD_NOT_CREATED -> TD_CREATED)
1559 */
fc24389f
JA
1560 for (i = 0; i < thread_number; i++) {
1561 td = &threads[i];
1562
02bdd9ba 1563 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1564 continue;
1565
213b446c
JA
1566 /*
1567 * never got a chance to start, killed by other
1568 * thread for some reason
1569 */
1570 if (td->terminate) {
1571 todo--;
1572 continue;
1573 }
1574
fc24389f 1575 if (td->start_delay) {
be33abe4 1576 spent = mtime_since_now(&genesis);
fc24389f
JA
1577
1578 if (td->start_delay * 1000 > spent)
1579 continue;
1580 }
1581
02bdd9ba 1582 td->runstate = TD_CREATED;
fc24389f
JA
1583 sem_init(&startup_sem, 1, 1);
1584 todo--;
1585
1586 if (fork())
1587 sem_wait(&startup_sem);
1588 else {
1589 thread_main(shm_id, i, argv);
1590 exit(0);
1591 }
1592 }
1593
3f39453a
JA
1594 /*
1595 * start created threads (TD_CREATED -> TD_STARTED)
1596 */
fc24389f
JA
1597 for (i = 0; i < thread_number; i++) {
1598 struct thread_data *td = &threads[i];
1599
3f39453a
JA
1600 if (td->runstate != TD_CREATED)
1601 continue;
1602
1603 td->runstate = TD_STARTED;
1604 nr_running++;
1605 m_rate += td->ratemin;
1606 t_rate += td->rate;
1607 sem_post(&td->mutex);
1608
1609 print_thread_status(td, nr_running, t_rate, m_rate, 0);
fc24389f
JA
1610 }
1611
213b446c 1612 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1613
fc24389f
JA
1614 if (todo)
1615 usleep(100000);
1616 }
02bdd9ba
JA
1617
1618 while (nr_running) {
213b446c 1619 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1620 usleep(10000);
1621 }
fc24389f
JA
1622}
1623
8867c0a8 1624int setup_thread_area(void)
892199bd 1625{
8867c0a8
JA
1626 /*
1627 * 1024 is too much on some machines, scale max_jobs if
1628 * we get a failure that looks like too large a shm segment
1629 */
1630 do {
1631 int s = max_jobs * sizeof(struct thread_data);
18e0b78c 1632
8867c0a8
JA
1633 shm_id = shmget(0, s, IPC_CREAT | 0600);
1634 if (shm_id != -1)
1635 break;
1636 if (errno != EINVAL) {
1637 perror("shmget");
1638 break;
1639 }
1640
1641 max_jobs >>= 1;
d4fac444 1642 } while (max_jobs);
8867c0a8
JA
1643
1644 if (shm_id == -1)
892199bd 1645 return 1;
892199bd
JA
1646
1647 threads = shmat(shm_id, NULL, 0);
8867c0a8 1648 if (threads == (void *) -1) {
86184d14
JA
1649 perror("shmat");
1650 return 1;
1651 }
892199bd
JA
1652
1653 atexit(free_shm);
8867c0a8
JA
1654 return 0;
1655}
1656
1657int main(int argc, char *argv[])
1658{
1659 static unsigned long max_run[2], min_run[2], total_blocks[2];
1660 static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
1661 static unsigned long read_mb, write_mb, read_agg, write_agg;
1662 int i;
1663
1664 if (setup_thread_area())
1665 return 1;
892199bd 1666
47d45203 1667 if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &def_thread.cpumask) == -1) {
4240cfa1
JA
1668 perror("sched_getaffinity");
1669 return 1;
1670 }
1671
47d45203
JA
1672 /*
1673 * fill globals
1674 */
1675 def_thread.ddir = DDIR_READ;
1676 def_thread.bs = DEF_BS;
02983297 1677 def_thread.odirect = DEF_ODIRECT;
47d45203 1678 def_thread.ratecycle = DEF_RATE_CYCLE;
02983297 1679 def_thread.sequential = DEF_SEQUENTIAL;
47d45203 1680 def_thread.timeout = DEF_TIMEOUT;
02983297
JA
1681 def_thread.create_file = DEF_CREATE;
1682 def_thread.overwrite = DEF_OVERWRITE;
b95799ca 1683 def_thread.invalidate_cache = DEF_INVALIDATE;
47d45203 1684
892199bd 1685 i = parse_options(argc, argv);
7dd1389e 1686
4240cfa1
JA
1687 if (ini_file) {
1688 if (parse_jobs_ini(ini_file))
1689 return 1;
1690 } else
1691 parse_jobs_cmd(argc, argv, i);
7dd1389e 1692
4240cfa1
JA
1693 if (!thread_number) {
1694 printf("Nothing to do\n");
1695 return 1;
1696 }
7dd1389e 1697
fc24389f 1698 run_threads(argv);
892199bd 1699
892199bd
JA
1700 min_bw[0] = min_run[0] = ~0UL;
1701 min_bw[1] = min_run[1] = ~0UL;
1702 minl[0] = minl[1] = ~0UL;
1703 for (i = 0; i < thread_number; i++) {
1704 struct thread_data *td = &threads[i];
1705 unsigned long bw = 0;
1706
1707 if (td->error)
7dd1389e 1708 goto show_stat;
892199bd
JA
1709
1710 if (td->runtime < min_run[td->ddir])
1711 min_run[td->ddir] = td->runtime;
1712 if (td->runtime > max_run[td->ddir])
1713 max_run[td->ddir] = td->runtime;
1714
1715 if (td->runtime)
4240cfa1 1716 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1717 if (bw < min_bw[td->ddir])
1718 min_bw[td->ddir] = bw;
1719 if (bw > max_bw[td->ddir])
1720 max_bw[td->ddir] = bw;
1721 if (td->max_latency < minl[td->ddir])
1722 minl[td->ddir] = td->max_latency;
1723 if (td->max_latency > maxl[td->ddir])
1724 maxl[td->ddir] = td->max_latency;
1725
4240cfa1 1726 total_blocks[td->ddir] += td->io_blocks;
892199bd 1727
02983297 1728 if (td_read(td)) {
4240cfa1 1729 read_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1730 if (td->runtime)
4240cfa1 1731 read_agg += (td->io_blocks * td->bs) / td->runtime;
02983297 1732 } else {
4240cfa1 1733 write_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1734 if (td->runtime)
4240cfa1 1735 write_agg += (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1736 }
1737
7dd1389e 1738show_stat:
892199bd
JA
1739 show_thread_status(td);
1740 }
1741
1742 printf("Run status:\n");
1743 if (max_run[DDIR_READ])
1744 printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
1745 if (max_run[DDIR_WRITE])
1746 printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
fc24389f 1747
892199bd
JA
1748 return 0;
1749}