[PATCH] fio: async latency calculation fixes
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
7dd1389e 29#include <ctype.h>
18e0b78c 30#include <sched.h>
43000118 31#include <libaio.h>
892199bd
JA
32#include <sys/time.h>
33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/wait.h>
36#include <semaphore.h>
37#include <sys/ipc.h>
38#include <sys/shm.h>
39#include <asm/unistd.h>
40
4240cfa1
JA
41#define MAX_JOBS (1024)
42
892199bd
JA
43/*
44 * assume we don't have _get either, if _set isn't defined
45 */
46#ifndef __NR_ioprio_set
47
48#if defined(__i386__)
49#define __NR_ioprio_set 289
50#define __NR_ioprio_get 290
51#elif defined(__powerpc__) || defined(__powerpc64__)
52#define __NR_ioprio_set 273
53#define __NR_ioprio_get 274
54#elif defined(__x86_64__)
55#define __NR_ioprio_set 251
56#define __NR_ioprio_get 252
57#elif defined(__ia64__)
58#define __NR_ioprio_set 1274
59#define __NR_ioprio_get 1275
60#elif defined(__alpha__)
61#define __NR_ioprio_set 442
62#define __NR_ioprio_get 443
63#elif defined(__s390x__) || defined(__s390__)
64#define __NR_ioprio_set 282
65#define __NR_ioprio_get 283
66#else
67#error "Unsupported arch"
68#endif
69
70#endif
71
72static int ioprio_set(int which, int who, int ioprio)
73{
74 return syscall(__NR_ioprio_set, which, who, ioprio);
75}
76
77enum {
78 IOPRIO_WHO_PROCESS = 1,
79 IOPRIO_WHO_PGRP,
80 IOPRIO_WHO_USER,
81};
82
83#define IOPRIO_CLASS_SHIFT 13
84
892199bd
JA
85#define MASK (4095)
86
4240cfa1
JA
87#define DEF_BS (4096)
88#define DEF_TIMEOUT (30)
89#define DEF_RATE_CYCLE (1000)
90#define DEF_ODIRECT (1)
91#define DEF_SEQUENTIAL (1)
92#define DEF_WRITESTAT (0)
93#define DEF_RAND_REPEAT (1)
94
95#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 96
4240cfa1
JA
97static int sequential = DEF_SEQUENTIAL;
98static int write_stat = DEF_WRITESTAT;
99static int repeatable = DEF_RAND_REPEAT;
100static int timeout = DEF_TIMEOUT;
101static int odirect = DEF_ODIRECT;
102static int global_bs = DEF_BS;
02bdd9ba 103static int rate_quit = 1;
892199bd 104
892199bd 105static int thread_number;
7dd1389e 106static char *ini_file;
892199bd
JA
107
108static int shm_id;
109
18e0b78c
JA
110static cpu_set_t def_cpumask;
111
4240cfa1
JA
112enum {
113 DDIR_READ = 0,
114 DDIR_WRITE,
115};
892199bd 116
02bdd9ba
JA
117/*
118 * thread life cycle
119 */
120enum {
121 TD_NOT_CREATED = 0,
122 TD_CREATED,
123 TD_STARTED,
124 TD_EXITED,
125 TD_REAPED,
126};
127
892199bd
JA
128struct thread_data {
129 char file_name[256];
130 int thread_number;
131 int error;
132 int fd;
133 int stat_fd;
134 pid_t pid;
4240cfa1 135 volatile int terminate;
02bdd9ba 136 volatile int runstate;
f737299d
JA
137 unsigned int ddir;
138 unsigned int ioprio;
139 unsigned int sequential;
140 unsigned int bs;
141 unsigned int odirect;
142 unsigned int delay_sleep;
4240cfa1 143 unsigned int fsync_blocks;
fc24389f 144 unsigned int start_delay;
43000118 145 unsigned int use_aio;
18e0b78c 146 cpu_set_t cpumask;
86184d14 147
43000118
JA
148 io_context_t *aio_ctx;
149 struct iocb *aio_iocbs;
150 unsigned int aio_depth;
151 unsigned int aio_cur_depth;
152 struct io_event *aio_events;
153 char *aio_iocbs_status;
154
7dd1389e 155 unsigned int rate;
4240cfa1
JA
156 unsigned int ratemin;
157 unsigned int ratecycle;
158 unsigned long rate_usec_cycle;
159 long rate_pending_usleep;
160 unsigned long rate_blocks;
161 struct timeval lastrate;
86184d14 162
892199bd
JA
163 unsigned long max_latency; /* msec */
164 unsigned long min_latency; /* msec */
165 unsigned long runtime; /* sec */
166 unsigned long blocks;
4240cfa1 167 unsigned long io_blocks;
892199bd
JA
168 unsigned long last_block;
169 sem_t mutex;
892199bd
JA
170 struct drand48_data random_state;
171
172 /*
173 * bandwidth stat
174 */
175 unsigned long stat_time;
176 unsigned long stat_time_last;
177 unsigned long stat_blocks_last;
4240cfa1
JA
178
179 struct timeval start;
892199bd
JA
180};
181
182static struct thread_data *threads;
892199bd
JA
183
184static sem_t startup_sem;
185
5c24b2c4 186static void sig_handler(int sig)
892199bd
JA
187{
188 int i;
189
213b446c
JA
190 for (i = 0; i < thread_number; i++) {
191 struct thread_data *td = &threads[i];
192
193 td->terminate = 1;
194 td->start_delay = 0;
195 }
02bdd9ba
JA
196}
197
5c24b2c4 198static int init_random_state(struct thread_data *td)
892199bd
JA
199{
200 unsigned long seed = 123;
201
202 if (td->sequential)
203 return 0;
204
205 if (!repeatable) {
206 int fd = open("/dev/random", O_RDONLY);
207
208 if (fd == -1) {
209 td->error = errno;
210 return 1;
211 }
212
7dd1389e 213 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
892199bd
JA
214 td->error = EIO;
215 close(fd);
216 return 1;
217 }
218
219 close(fd);
220 }
221
222 srand48_r(seed, &td->random_state);
223 return 0;
224}
225
5c24b2c4 226static void shutdown_stat_file(struct thread_data *td)
892199bd
JA
227{
228 if (td->stat_fd != -1) {
229 fsync(td->stat_fd);
230 close(td->stat_fd);
231 }
232}
233
5c24b2c4 234static int init_stat_file(struct thread_data *td)
892199bd 235{
4240cfa1 236 char n[256];
892199bd
JA
237
238 if (!write_stat)
239 return 0;
240
892199bd
JA
241 sprintf(n, "%s.stat", td->file_name);
242 td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
243 if (td->stat_fd == -1) {
892199bd
JA
244 td->error = errno;
245 return 1;
246 }
247
892199bd
JA
248 return 0;
249}
250
5c24b2c4 251static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
252{
253 double sec, usec;
254
255 sec = e->tv_sec - s->tv_sec;
256 usec = e->tv_usec - s->tv_usec;
257 if (sec > 0 && usec < 0) {
258 sec--;
259 usec += 1000000;
260 }
261
262 sec *= (double) 1000000;
263
264 return sec + usec;
265}
266
5c24b2c4 267static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
268{
269 double sec, usec;
270
271 sec = e->tv_sec - s->tv_sec;
272 usec = e->tv_usec - s->tv_usec;
273 if (sec > 0 && usec < 0) {
274 sec--;
275 usec += 1000000;
276 }
277
278 sec *= (double) 1000;
279 usec /= (double) 1000;
280
281 return sec + usec;
282}
283
98168d55
JA
284static inline unsigned long msec_now(struct timeval *s)
285{
286 return s->tv_sec * 1000 + s->tv_usec / 1000;
287}
288
5c24b2c4 289static unsigned long get_next_offset(struct thread_data *td)
892199bd
JA
290{
291 unsigned long b;
292 long r;
293
294 if (!td->sequential) {
295 lrand48_r(&td->random_state, &r);
296 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
297 } else {
298 b = td->last_block;
299 td->last_block++;
300 }
301
302 return b * td->bs;
303}
304
5c24b2c4 305static void add_stat_sample(struct thread_data *td, unsigned long msec)
892199bd
JA
306{
307 char sample[256];
308
309 if (!td->stat_fd)
310 return;
311
312#if 0
4240cfa1 313 sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
892199bd
JA
314 write(td->stat_fd, sample, strlen(sample));
315#else
316 td->stat_time += msec;
317 td->stat_time_last += msec;
318 td->stat_blocks_last++;
319
320 if (td->stat_time_last >= 500) {
321 unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
322
323 td->stat_time_last = 0;
324 td->stat_blocks_last = 0;
325 sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
4240cfa1 326 //sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
892199bd
JA
327 write(td->stat_fd, sample, strlen(sample));
328 }
329#endif
330}
331
5c24b2c4 332static void usec_sleep(int usec)
892199bd 333{
86184d14
JA
334 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
335 struct timespec rem;
892199bd
JA
336
337 do {
86184d14
JA
338 rem.tv_sec = rem.tv_nsec = 0;
339 nanosleep(&req, &rem);
340 if (!rem.tv_nsec)
892199bd 341 break;
86184d14
JA
342
343 req.tv_nsec = rem.tv_nsec;
892199bd
JA
344 } while (1);
345}
346
5c24b2c4 347static void rate_throttle(struct thread_data *td, unsigned long time_spent)
86184d14 348{
4240cfa1
JA
349 if (!td->rate)
350 return;
351
86184d14
JA
352 if (time_spent < td->rate_usec_cycle) {
353 unsigned long s = td->rate_usec_cycle - time_spent;
354
355 td->rate_pending_usleep += s;
fad86e6a 356 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
357 usec_sleep(td->rate_pending_usleep);
358 td->rate_pending_usleep = 0;
359 }
4240cfa1 360 } else {
42b2b9fe
JA
361 long overtime = time_spent - td->rate_usec_cycle;
362
4240cfa1
JA
363 td->rate_pending_usleep -= overtime;
364 }
365}
366
5c24b2c4 367static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1
JA
368{
369 unsigned long spent = mtime_since(&td->start, now);
370 unsigned long rate;
371
372 /*
373 * allow a 2 second settle period in the beginning
374 */
375 if (spent < 2000)
376 return 0;
377
378 /*
379 * if rate blocks is set, sample is running
380 */
381 if (td->rate_blocks) {
382 spent = mtime_since(&td->lastrate, now);
383 if (spent < td->ratecycle)
384 return 0;
385
386 rate = ((td->io_blocks - td->rate_blocks) * td->bs) / spent;
387 if (rate < td->ratemin) {
388 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 389 if (rate_quit)
e6402082 390 sig_handler(0);
4240cfa1
JA
391 return 1;
392 }
86184d14 393 }
4240cfa1
JA
394
395 td->rate_blocks = td->io_blocks;
396 memcpy(&td->lastrate, now, sizeof(*now));
397 return 0;
86184d14
JA
398}
399
4240cfa1
JA
400#define should_fsync(td) ((td)->ddir == DDIR_WRITE && !(td)->odirect)
401
43000118 402static void do_sync_io(struct thread_data *td)
892199bd 403{
4240cfa1 404 struct timeval s, e;
892199bd 405 char *buffer, *ptr;
86184d14 406 unsigned long blocks, msec, usec;
892199bd 407
4240cfa1
JA
408 ptr = malloc(td->bs + MASK);
409 buffer = ALIGN(ptr);
892199bd 410
4240cfa1
JA
411 gettimeofday(&td->start, NULL);
412
413 if (td->ratemin)
414 memcpy(&td->lastrate, &td->start, sizeof(td->start));
892199bd
JA
415
416 for (blocks = 0; blocks < td->blocks; blocks++) {
417 off_t offset = get_next_offset(td);
418 int ret;
419
420 if (td->terminate)
421 break;
422
423 if (lseek(td->fd, offset, SEEK_SET) == -1) {
424 td->error = errno;
425 break;
426 }
427
428 if (td->delay_sleep)
86184d14 429 usec_sleep(td->delay_sleep);
892199bd
JA
430
431 gettimeofday(&s, NULL);
432
433 if (td->ddir == DDIR_READ)
434 ret = read(td->fd, buffer, td->bs);
435 else
436 ret = write(td->fd, buffer, td->bs);
437
f737299d 438 if (ret < (int) td->bs) {
892199bd
JA
439 if (ret == -1)
440 td->error = errno;
441 break;
442 }
443
4240cfa1
JA
444 td->io_blocks++;
445
446 if (should_fsync(td) && td->fsync_blocks &&
447 (td->io_blocks % td->fsync_blocks) == 0)
448 fsync(td->fd);
449
86184d14
JA
450 gettimeofday(&e, NULL);
451
452 usec = utime_since(&s, &e);
86184d14 453
4240cfa1 454 rate_throttle(td, usec);
892199bd 455
4240cfa1
JA
456 if (check_min_rate(td, &e)) {
457 td->error = ENODATA;
458 break;
459 }
892199bd 460
4240cfa1
JA
461 msec = usec / 1000;
462 add_stat_sample(td, msec);
892199bd
JA
463
464 if (msec < td->min_latency)
465 td->min_latency = msec;
466 if (msec > td->max_latency)
467 td->max_latency = msec;
468 }
469
4240cfa1 470 if (should_fsync(td))
892199bd
JA
471 fsync(td->fd);
472
473 gettimeofday(&e, NULL);
4240cfa1 474 td->runtime = mtime_since(&td->start, &e);
892199bd
JA
475
476 free(ptr);
477}
43000118
JA
478
479static void aio_put_iocb(struct thread_data *td, struct iocb *iocb)
480{
481 long offset = ((long) iocb - (long) td->aio_iocbs)/ sizeof(struct iocb);
482
483 td->aio_iocbs_status[offset] = 0;
484}
485
98168d55
JA
486static struct iocb *aio_get_iocb(struct thread_data *td, char *buffer,
487 struct timeval *t)
43000118
JA
488{
489 struct iocb *iocb = NULL;
490 int i;
491
492 for (i = 0; i < td->aio_depth; i++) {
493 if (td->aio_iocbs_status[i] == 0) {
494 td->aio_iocbs_status[i] = 1;
495 iocb = &td->aio_iocbs[i];
496 break;
497 }
498 }
499
500 if (iocb) {
501 off_t off = get_next_offset(td);
502 char *p = buffer + i * td->bs;
503
504 if (td->ddir == DDIR_READ)
505 io_prep_pread(iocb, td->fd, p, td->bs, off);
506 else
507 io_prep_pwrite(iocb, td->fd, p, td->bs, off);
98168d55
JA
508
509 io_set_callback(iocb, (io_callback_t) msec_now(t));
43000118
JA
510 }
511
512 return iocb;
513}
514
98168d55
JA
515#define iocb_time(iocb) ((unsigned long) (iocb)->data)
516
43000118
JA
517static void do_async_io(struct thread_data *td)
518{
519 struct timeval s, e;
520 char *buf, *ptr;
521 unsigned long blocks, msec, usec;
43000118
JA
522
523 ptr = malloc(td->bs * td->aio_depth + MASK);
524 buf = ALIGN(ptr);
525
526 gettimeofday(&td->start, NULL);
527
528 if (td->ratemin)
529 memcpy(&td->lastrate, &td->start, sizeof(td->start));
530
531 for (blocks = 0; blocks < td->blocks; blocks++) {
532 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
533 struct timespec *timeout;
43000118 534 int ret, i, min_evts = 0;
8baf1bcc 535 struct iocb *iocb;
43000118
JA
536
537 if (td->terminate)
538 break;
539
540 if (td->delay_sleep)
541 usec_sleep(td->delay_sleep);
542
543 gettimeofday(&s, NULL);
544
98168d55 545 iocb = aio_get_iocb(td, buf, &s);
8baf1bcc 546
43000118
JA
547 ret = io_submit(*td->aio_ctx, 1, &iocb);
548 if (ret < 0) {
549 td->error = errno;
550 break;
551 }
552
553 td->aio_cur_depth++;
43000118
JA
554
555 if (td->aio_cur_depth < td->aio_depth) {
556 timeout = &ts;
557 min_evts = 0;
558 } else {
559 timeout = NULL;
560 min_evts = 1;
561 }
562
563 ret = io_getevents(*td->aio_ctx, min_evts, td->aio_cur_depth, td->aio_events, timeout);
564 if (ret < 0) {
565 td->error = errno;
566 break;
567 } else if (!ret)
568 continue;
569
98168d55
JA
570 gettimeofday(&e, NULL);
571
43000118
JA
572 for (i = 0; i < ret; i++) {
573 struct io_event *ev = td->aio_events + i;
574
575 td->io_blocks++;
576 td->aio_cur_depth--;
577
578 iocb = ev->obj;
98168d55
JA
579
580 msec = msec_now(&e) - iocb_time(iocb);
581 add_stat_sample(td, msec);
582
583 if (msec < td->min_latency)
584 td->min_latency = msec;
585 if (msec > td->max_latency)
586 td->max_latency = msec;
587
43000118
JA
588 aio_put_iocb(td, iocb);
589 }
590
98168d55
JA
591 /*
592 * the rate is batched for now, it should work for batches
593 * of completions except the very first one which may look
594 * a little bursty
595 */
43000118
JA
596 usec = utime_since(&s, &e);
597
598 rate_throttle(td, usec);
599
600 if (check_min_rate(td, &e)) {
601 td->error = ENODATA;
602 break;
603 }
43000118
JA
604 }
605
606 gettimeofday(&e, NULL);
607 td->runtime = mtime_since(&td->start, &e);
608
609 free(ptr);
610}
611
612static void cleanup_aio(struct thread_data *td)
613{
614 /*
615 * flush pending events
616 */
617 if (td->aio_cur_depth)
618 io_getevents(*td->aio_ctx, td->aio_cur_depth, td->aio_cur_depth, td->aio_events, NULL);
619
620 if (td->aio_ctx) {
621 io_destroy(*td->aio_ctx);
622 free(td->aio_ctx);
623 }
624 if (td->aio_iocbs)
625 free(td->aio_iocbs);
626 if (td->aio_events)
627 free(td->aio_events);
628 if (td->aio_iocbs_status)
629 free(td->aio_iocbs_status);
630}
631
632static int init_aio(struct thread_data *td)
633{
634 td->aio_ctx = malloc(sizeof(*td->aio_ctx));
635
636 if (io_queue_init(td->aio_depth, td->aio_ctx)) {
637 td->error = errno;
638 return 1;
639 }
640
641 td->aio_iocbs = malloc(td->aio_depth * sizeof(struct iocb));
642 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
643 td->aio_iocbs_status = malloc(td->aio_depth * sizeof(char));
644 return 0;
645}
646
5c24b2c4 647static void *thread_main(int shm_id, int offset, char *argv[])
892199bd
JA
648{
649 struct thread_data *td;
650 void *data;
4240cfa1 651 struct stat st;
892199bd
JA
652 int ret = 1, flags;
653
654 data = shmat(shm_id, NULL, 0);
655 td = data + offset * sizeof(struct thread_data);
656 td->pid = getpid();
657
18e0b78c
JA
658 td->fd = -1;
659
660 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
661 td->error = errno;
662 goto err;
663 }
664
43000118 665 printf("Thread (%s) (pid=%u) (f=%s) (aio=%d) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name, td->use_aio);
892199bd
JA
666 fflush(stdout);
667
4240cfa1 668 sprintf(argv[0], "fio%d", offset);
892199bd 669
7dd1389e 670 flags = 0;
892199bd
JA
671 if (td->odirect)
672 flags |= O_DIRECT;
673
7dd1389e
JA
674 if (td->ddir == DDIR_READ)
675 td->fd = open(td->file_name, flags | O_RDONLY);
676 else
677 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT | O_TRUNC, 0644);
678
892199bd
JA
679 if (td->fd == -1) {
680 td->error = errno;
681 goto err;
682 }
683
43000118
JA
684 if (td->use_aio && init_aio(td))
685 goto err;
686
892199bd
JA
687 if (init_random_state(td))
688 goto out;
689 if (init_stat_file(td))
690 goto out;
691
692 if (td->ddir == DDIR_READ) {
4240cfa1 693 if (fstat(td->fd, &st) == -1) {
892199bd
JA
694 td->error = errno;
695 goto out;
696 }
697
4240cfa1 698 td->blocks = st.st_size / td->bs;
892199bd
JA
699 if (!td->blocks) {
700 td->error = EINVAL;
701 goto out;
702 }
703 } else
704 td->blocks = 1024 * 1024 * 1024 / td->bs;
705
f737299d 706 if (td->ioprio) {
892199bd
JA
707 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
708 td->error = errno;
709 goto out;
710 }
711 }
712
713 sem_post(&startup_sem);
714 sem_wait(&td->mutex);
43000118
JA
715
716 if (!td->use_aio)
717 do_sync_io(td);
718 else
719 do_async_io(td);
720
892199bd
JA
721 ret = 0;
722
723out:
892199bd
JA
724 shutdown_stat_file(td);
725err:
18e0b78c
JA
726 if (td->fd != -1)
727 close(td->fd);
43000118
JA
728 if (td->use_aio)
729 cleanup_aio(td);
892199bd
JA
730 if (ret)
731 sem_post(&startup_sem);
02bdd9ba
JA
732
733 td->runstate = TD_EXITED;
4240cfa1 734 shmdt(data);
892199bd
JA
735 return NULL;
736}
737
5c24b2c4 738static void free_shm(void)
892199bd
JA
739{
740 shmdt(threads);
741}
742
5c24b2c4 743static void show_thread_status(struct thread_data *td)
892199bd
JA
744{
745 int prio, prio_class;
746 unsigned long bw = 0;
747
213b446c
JA
748 if (!td->io_blocks && !td->error)
749 return;
750
892199bd 751 if (td->runtime)
4240cfa1 752 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
753
754 prio = td->ioprio & 0xff;
755 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
756
4240cfa1 757 printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->io_blocks * td->bs >> 20, bw);
892199bd
JA
758}
759
5c24b2c4 760static int setup_rate(struct thread_data *td)
86184d14 761{
4240cfa1
JA
762 int nr_reads_per_sec;
763
764 if (!td->rate)
765 return 0;
766
767 if (td->rate < td->ratemin) {
768 fprintf(stderr, "min rate larger than nominal rate\n");
769 return -1;
770 }
86184d14 771
4240cfa1 772 nr_reads_per_sec = td->rate * 1024 / td->bs;
86184d14
JA
773 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
774 td->rate_pending_usleep = 0;
4240cfa1 775 return 0;
86184d14
JA
776}
777
5c24b2c4 778static struct thread_data *get_new_job(void)
892199bd 779{
4240cfa1
JA
780 struct thread_data *td;
781
782 if (thread_number >= MAX_JOBS)
783 return NULL;
784
785 td = &threads[thread_number++];
fc24389f 786 memset(td, 0, sizeof(*td));
892199bd 787
86184d14 788 td->thread_number = thread_number;
f737299d
JA
789 td->ddir = DDIR_READ;
790 td->bs = global_bs;
791 td->odirect = 1;
792 td->delay_sleep = 0;
793 td->rate = 0;
4240cfa1 794 td->ratecycle = DEF_RATE_CYCLE;
f737299d
JA
795 td->sequential = sequential;
796 td->ioprio = 0;
43000118
JA
797 td->use_aio = 0;
798 td->aio_depth = 0;
799 td->aio_cur_depth = 0;
f737299d
JA
800 memcpy(&td->cpumask, &def_cpumask, sizeof(td->cpumask));
801
802 return td;
803}
804
4240cfa1
JA
805static void put_job(struct thread_data *td)
806{
807 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
808 thread_number--;
809}
810
5c24b2c4
JA
811static int add_job(struct thread_data *td, const char *filename, int prioclass,
812 int prio)
f737299d
JA
813{
814 strcpy(td->file_name, filename);
892199bd 815 td->stat_fd = -1;
4240cfa1 816 sem_init(&td->mutex, 1, 0);
892199bd 817 td->min_latency = 10000000;
f737299d
JA
818 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
819
43000118
JA
820 if (td->use_aio && !td->aio_depth)
821 td->aio_depth = 1;
822
4240cfa1
JA
823 if (setup_rate(td))
824 return -1;
f737299d 825
43000118 826 printf("Client%d: file=%s, rw=%d, prio=%d, seq=%d, odir=%d, bs=%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, filename, td->ddir, td->ioprio, td->sequential, td->odirect, td->bs, td->rate, td->use_aio, td->aio_depth);
4240cfa1 827 return 0;
892199bd
JA
828}
829
18e0b78c
JA
830static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
831{
f737299d 832 unsigned int i;
18e0b78c
JA
833
834 CPU_ZERO(&cpumask);
835
836 for (i = 0; i < sizeof(int) * 8; i++) {
837 if ((1 << i) & cpu)
838 CPU_SET(i, &cpumask);
839 }
840}
841
5c24b2c4 842static void fill_option(const char *input, char *output)
892199bd
JA
843{
844 int i;
845
846 i = 0;
847 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
848 output[i] = input[i];
849 i++;
850 }
851
852 output[i] = '\0';
853}
854
855/*
856 * job key words:
857 *
858 * file=
859 * bs=
860 * rw=
861 * direct=
862 */
5c24b2c4 863static void parse_jobs_cmd(int argc, char *argv[], int index)
892199bd 864{
f737299d
JA
865 struct thread_data *td;
866 unsigned int prio, prioclass, cpu;
892199bd
JA
867 char *string, *filename, *p, *c;
868 int i;
869
870 string = malloc(256);
871 filename = malloc(256);
872
873 for (i = index; i < argc; i++) {
874 p = argv[i];
875
876 c = strpbrk(p, "{");
877 if (!c)
878 break;
879
880 filename[0] = 0;
4240cfa1 881
f737299d 882 td = get_new_job();
4240cfa1
JA
883 if (!td)
884 break;
f737299d 885
892199bd 886 prioclass = 2;
f737299d 887 prio = 4;
892199bd
JA
888
889 c = strstr(p, "rw=");
890 if (c) {
891 c += 3;
892 if (*c == '0')
f737299d 893 td->ddir = DDIR_READ;
892199bd 894 else
f737299d 895 td->ddir = DDIR_WRITE;
892199bd
JA
896 }
897
898 c = strstr(p, "prio=");
899 if (c) {
900 c += 5;
901 prio = *c - '0';
902 }
903
904 c = strstr(p, "prioclass=");
905 if (c) {
906 c += 10;
907 prioclass = *c - '0';
908 }
909
910 c = strstr(p, "file=");
911 if (c) {
912 c += 5;
913 fill_option(c, filename);
914 }
915
916 c = strstr(p, "bs=");
917 if (c) {
918 c += 3;
919 fill_option(c, string);
f737299d
JA
920 td->bs = strtoul(string, NULL, 10);
921 td->bs <<= 10;
892199bd
JA
922 }
923
924 c = strstr(p, "direct=");
925 if (c) {
926 c += 7;
927 if (*c != '0')
f737299d 928 td->odirect = 1;
892199bd 929 else
f737299d 930 td->odirect = 0;
892199bd
JA
931 }
932
933 c = strstr(p, "delay=");
934 if (c) {
935 c += 6;
936 fill_option(c, string);
f737299d 937 td->delay_sleep = strtoul(string, NULL, 10);
892199bd
JA
938 }
939
86184d14
JA
940 c = strstr(p, "rate=");
941 if (c) {
942 c += 5;
943 fill_option(c, string);
f737299d 944 td->rate = strtoul(string, NULL, 10);
86184d14
JA
945 }
946
4240cfa1
JA
947 c = strstr(p, "ratemin=");
948 if (c) {
949 c += 8;
950 fill_option(c, string);
951 td->ratemin = strtoul(string, NULL, 10);
952 }
953
954 c = strstr(p, "ratecycle=");
955 if (c) {
956 c += 10;
957 fill_option(c, string);
958 td->ratecycle = strtoul(string, NULL, 10);
959 }
960
18e0b78c
JA
961 c = strstr(p, "cpumask=");
962 if (c) {
963 c += 8;
964 fill_option(c, string);
965 cpu = strtoul(string, NULL, 10);
f737299d 966 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
967 }
968
4240cfa1
JA
969 c = strstr(p, "fsync=");
970 if (c) {
971 c += 6;
972 fill_option(c, string);
973 td->fsync_blocks = strtoul(string, NULL, 10);
974 }
18e0b78c 975
fc24389f
JA
976 c = strstr(p, "startdelay=");
977 if (c) {
978 c += 11;
979 fill_option(c, string);
980 td->start_delay = strtoul(string, NULL, 10);
981 }
982
43000118
JA
983 c = strstr(p, "aio_depth=");
984 if (c) {
985 c += 10;
986 fill_option(c, string);
987 td->aio_depth = strtoul(string, NULL, 10);
988 }
989
990 c = strstr(p, "aio");
991 if (c)
992 td->use_aio = 1;
993
892199bd
JA
994 c = strstr(p, "random");
995 if (c)
f737299d 996 td->sequential = 0;
892199bd
JA
997 c = strstr(p, "sequential");
998 if (c)
f737299d 999 td->sequential = 1;
892199bd 1000
4240cfa1
JA
1001 if (add_job(td, filename, prioclass, prio))
1002 put_job(td);
892199bd
JA
1003 }
1004
7dd1389e
JA
1005 free(string);
1006 free(filename);
892199bd
JA
1007}
1008
5c24b2c4 1009static int check_int(char *p, char *name, unsigned int *val)
7dd1389e
JA
1010{
1011 char str[128];
1012
1013 sprintf(str, "%s=%%d", name);
1014 if (sscanf(p, str, val) == 1)
1015 return 0;
1016
1017 sprintf(str, "%s = %%d", name);
1018 if (sscanf(p, str, val) == 1)
1019 return 0;
1020
1021 return 1;
1022}
1023
5c24b2c4 1024static int is_empty(char *line)
7dd1389e
JA
1025{
1026 unsigned int i;
1027
1028 for (i = 0; i < strlen(line); i++)
1029 if (!isspace(line[i]) && !iscntrl(line[i]))
1030 return 0;
1031
1032 return 1;
1033}
1034
5c24b2c4 1035static int parse_jobs_ini(char *file)
7dd1389e 1036{
4240cfa1 1037 unsigned int prioclass, prio, cpu;
f737299d 1038 struct thread_data *td;
7dd1389e
JA
1039 char *string, *name;
1040 fpos_t off;
1041 FILE *f;
1042 char *p;
1043
1044 f = fopen(file, "r");
1045 if (!f) {
1046 perror("fopen");
4240cfa1 1047 return 1;
7dd1389e
JA
1048 }
1049
1050 string = malloc(4096);
1051 name = malloc(256);
1052
7dd1389e
JA
1053 while ((p = fgets(string, 4096, f)) != NULL) {
1054 if (sscanf(p, "[%s]", name) != 1)
1055 continue;
1056
1057 name[strlen(name) - 1] = '\0';
1058
f737299d 1059 td = get_new_job();
4240cfa1
JA
1060 if (!td)
1061 break;
f737299d 1062
7dd1389e 1063 prioclass = 2;
f737299d 1064 prio = 4;
7dd1389e
JA
1065
1066 fgetpos(f, &off);
1067 while ((p = fgets(string, 4096, f)) != NULL) {
1068 if (is_empty(p))
e6402082
JA
1069 continue;
1070 if (strstr(p, "["))
7dd1389e 1071 break;
f737299d
JA
1072 if (!check_int(p, "bs", &td->bs)) {
1073 td->bs <<= 10;
7dd1389e
JA
1074 fgetpos(f, &off);
1075 continue;
1076 }
f737299d 1077 if (!check_int(p, "rw", &td->ddir)) {
7dd1389e
JA
1078 fgetpos(f, &off);
1079 continue;
1080 }
1081 if (!check_int(p, "prio", &prio)) {
1082 fgetpos(f, &off);
1083 continue;
1084 }
1085 if (!check_int(p, "prioclass", &prioclass)) {
1086 fgetpos(f, &off);
1087 continue;
1088 }
f737299d 1089 if (!check_int(p, "direct", &td->odirect)) {
7dd1389e
JA
1090 fgetpos(f, &off);
1091 continue;
1092 }
f737299d 1093 if (!check_int(p, "rate", &td->rate)) {
7dd1389e
JA
1094 fgetpos(f, &off);
1095 continue;
1096 }
4240cfa1
JA
1097 if (!check_int(p, "ratemin", &td->ratemin)) {
1098 fgetpos(f, &off);
1099 continue;
1100 }
1101 if (!check_int(p, "ratecycle", &td->ratecycle)) {
1102 fgetpos(f, &off);
1103 continue;
1104 }
f737299d 1105 if (!check_int(p, "delay", &td->delay_sleep)) {
7dd1389e
JA
1106 fgetpos(f, &off);
1107 continue;
1108 }
18e0b78c 1109 if (!check_int(p, "cpumask", &cpu)) {
f737299d 1110 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
1111 fgetpos(f, &off);
1112 continue;
1113 }
4240cfa1
JA
1114 if (!check_int(p, "fsync", &td->fsync_blocks)) {
1115 fgetpos(f, &off);
1116 continue;
1117 }
fc24389f
JA
1118 if (!check_int(p, "startdelay", &td->start_delay)) {
1119 fgetpos(f, &off);
1120 continue;
1121 }
43000118
JA
1122 if (!check_int(p, "aio_depth", &td->aio_depth)) {
1123 fgetpos(f, &off);
1124 continue;
1125 }
1126 if (!strncmp(p, "sequential", 10)) {
f737299d 1127 td->sequential = 1;
7dd1389e
JA
1128 fgetpos(f, &off);
1129 continue;
1130 }
43000118 1131 if (!strncmp(p, "random", 6)) {
f737299d 1132 td->sequential = 0;
7dd1389e
JA
1133 fgetpos(f, &off);
1134 continue;
1135 }
43000118
JA
1136 if (!strncmp(p, "aio", 3)) {
1137 td->use_aio = 1;
1138 fgetpos(f, &off);
1139 continue;
1140 }
1141
e6402082 1142 printf("Client%d: bad option %s\n",td->thread_number,p);
7dd1389e
JA
1143 }
1144 fsetpos(f, &off);
1145
4240cfa1
JA
1146 if (add_job(td, name, prioclass, prio))
1147 put_job(td);
7dd1389e
JA
1148 }
1149
1150 free(string);
1151 free(name);
fc7d63df 1152 fclose(f);
4240cfa1 1153 return 0;
7dd1389e
JA
1154}
1155
5c24b2c4 1156static int parse_options(int argc, char *argv[])
892199bd 1157{
01c4d8de 1158 int i;
892199bd
JA
1159
1160 for (i = 1; i < argc; i++) {
1161 char *parm = argv[i];
1162
1163 if (parm[0] != '-')
1164 break;
1165
1166 parm++;
1167 switch (*parm) {
1168 case 's':
1169 parm++;
1170 sequential = !!atoi(parm);
1171 break;
1172 case 'b':
1173 parm++;
1174 global_bs = atoi(parm);
1175 global_bs <<= 10;
4240cfa1
JA
1176 if (!global_bs) {
1177 printf("bad block size\n");
1178 global_bs = DEF_BS;
1179 }
892199bd
JA
1180 break;
1181 case 't':
1182 parm++;
1183 timeout = atoi(parm);
1184 break;
1185 case 'w':
1186 parm++;
1187 write_stat = !!atoi(parm);
1188 break;
1189 case 'r':
1190 parm++;
1191 repeatable = !!atoi(parm);
1192 break;
02bdd9ba
JA
1193 case 'R':
1194 parm++;
1195 rate_quit = !!atoi(parm);
1196 break;
892199bd
JA
1197 case 'o':
1198 parm++;
1199 odirect = !!atoi(parm);
1200 break;
7dd1389e
JA
1201 case 'f':
1202 if (i + 1 >= argc) {
1203 printf("-f needs file as arg\n");
1204 break;
1205 }
1206 ini_file = strdup(argv[i+1]);
1207 break;
892199bd 1208 default:
7dd1389e 1209 printf("bad option %s\n", argv[i]);
892199bd
JA
1210 break;
1211 }
1212 }
1213
892199bd
JA
1214 return i;
1215}
1216
213b446c 1217static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1218{
213b446c 1219 int i;
02bdd9ba
JA
1220
1221 for (i = 0; i < thread_number; i++) {
1222 struct thread_data *td = &threads[i];
1223
213b446c
JA
1224 if (td->runstate != TD_EXITED)
1225 continue;
02bdd9ba 1226
213b446c
JA
1227 td->runstate = TD_REAPED;
1228 waitpid(td->pid, NULL, 0);
1229 (*nr_running)--;
1230 (*m_rate) -= td->ratemin;
1231 (*t_rate) -= td->rate;
e6402082
JA
1232
1233 if (td->terminate)
1234 continue;
1235
213b446c
JA
1236 printf("Threads now running: %d", *nr_running);
1237 if (*m_rate || *t_rate)
1238 printf(", rate %d/%dKiB/sec", *t_rate, *m_rate);
1239 printf("\n");
1240 }
02bdd9ba
JA
1241}
1242
fc24389f
JA
1243static void run_threads(char *argv[])
1244{
1245 struct timeval genesis, now;
1246 struct thread_data *td;
1247 unsigned long spent;
213b446c 1248 int i, todo, nr_running, m_rate, t_rate;
fc24389f
JA
1249
1250 gettimeofday(&genesis, NULL);
1251
1252 printf("Starting %d threads\n", thread_number);
1253 fflush(stdout);
1254
1255 if (timeout) {
1256 signal(SIGALRM, sig_handler);
1257 alarm(timeout);
1258 }
1259
1260 todo = thread_number;
02bdd9ba 1261 nr_running = 0;
213b446c 1262 m_rate = t_rate = 0;
fc24389f 1263
213b446c 1264 while (todo) {
fc24389f
JA
1265 for (i = 0; i < thread_number; i++) {
1266 td = &threads[i];
1267
02bdd9ba 1268 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1269 continue;
1270
213b446c
JA
1271 /*
1272 * never got a chance to start, killed by other
1273 * thread for some reason
1274 */
1275 if (td->terminate) {
1276 todo--;
1277 continue;
1278 }
1279
fc24389f
JA
1280 if (td->start_delay) {
1281 gettimeofday(&now, NULL);
1282 spent = mtime_since(&genesis, &now);
1283
1284 if (td->start_delay * 1000 > spent)
1285 continue;
1286 }
1287
02bdd9ba 1288 td->runstate = TD_CREATED;
fc24389f
JA
1289 sem_init(&startup_sem, 1, 1);
1290 todo--;
1291
1292 if (fork())
1293 sem_wait(&startup_sem);
1294 else {
1295 thread_main(shm_id, i, argv);
1296 exit(0);
1297 }
1298 }
1299
1300 for (i = 0; i < thread_number; i++) {
1301 struct thread_data *td = &threads[i];
1302
02bdd9ba
JA
1303 if (td->runstate == TD_CREATED) {
1304 td->runstate = TD_STARTED;
1305 nr_running++;
213b446c
JA
1306 m_rate += td->ratemin;
1307 t_rate += td->rate;
fc24389f 1308 sem_post(&td->mutex);
213b446c
JA
1309
1310 printf("Threads now running: %d", nr_running);
1311 if (m_rate || t_rate)
1312 printf(", rate %d/%dKiB/sec", t_rate, m_rate);
1313 printf("\n");
fc24389f
JA
1314 }
1315 }
1316
213b446c 1317 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1318
fc24389f
JA
1319 if (todo)
1320 usleep(100000);
1321 }
02bdd9ba
JA
1322
1323 while (nr_running) {
213b446c 1324 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1325 usleep(10000);
1326 }
fc24389f
JA
1327}
1328
892199bd
JA
1329int main(int argc, char *argv[])
1330{
1331 static unsigned long max_run[2], min_run[2], total_blocks[2];
1332 static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
1333 static unsigned long read_mb, write_mb, read_agg, write_agg;
4240cfa1 1334 int i;
18e0b78c 1335
4240cfa1 1336 shm_id = shmget(0, MAX_JOBS * sizeof(struct thread_data), IPC_CREAT | 0600);
892199bd
JA
1337 if (shm_id == -1) {
1338 perror("shmget");
1339 return 1;
1340 }
1341
1342 threads = shmat(shm_id, NULL, 0);
86184d14
JA
1343 if (threads == (void *) -1 ) {
1344 perror("shmat");
1345 return 1;
1346 }
892199bd
JA
1347
1348 atexit(free_shm);
1349
4240cfa1
JA
1350 if (sched_getaffinity(getpid(), sizeof(def_cpumask), &def_cpumask) == -1) {
1351 perror("sched_getaffinity");
1352 return 1;
1353 }
1354
892199bd 1355 i = parse_options(argc, argv);
7dd1389e 1356
4240cfa1
JA
1357 if (ini_file) {
1358 if (parse_jobs_ini(ini_file))
1359 return 1;
1360 } else
1361 parse_jobs_cmd(argc, argv, i);
7dd1389e 1362
4240cfa1
JA
1363 if (!thread_number) {
1364 printf("Nothing to do\n");
1365 return 1;
1366 }
7dd1389e
JA
1367
1368 printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
1369
fc24389f 1370 run_threads(argv);
892199bd 1371
892199bd
JA
1372 min_bw[0] = min_run[0] = ~0UL;
1373 min_bw[1] = min_run[1] = ~0UL;
1374 minl[0] = minl[1] = ~0UL;
1375 for (i = 0; i < thread_number; i++) {
1376 struct thread_data *td = &threads[i];
1377 unsigned long bw = 0;
1378
1379 if (td->error)
7dd1389e 1380 goto show_stat;
892199bd
JA
1381
1382 if (td->runtime < min_run[td->ddir])
1383 min_run[td->ddir] = td->runtime;
1384 if (td->runtime > max_run[td->ddir])
1385 max_run[td->ddir] = td->runtime;
1386
1387 if (td->runtime)
4240cfa1 1388 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1389 if (bw < min_bw[td->ddir])
1390 min_bw[td->ddir] = bw;
1391 if (bw > max_bw[td->ddir])
1392 max_bw[td->ddir] = bw;
1393 if (td->max_latency < minl[td->ddir])
1394 minl[td->ddir] = td->max_latency;
1395 if (td->max_latency > maxl[td->ddir])
1396 maxl[td->ddir] = td->max_latency;
1397
4240cfa1 1398 total_blocks[td->ddir] += td->io_blocks;
892199bd
JA
1399
1400 if (td->ddir == DDIR_READ) {
4240cfa1 1401 read_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1402 if (td->runtime)
4240cfa1 1403 read_agg += (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1404 }
1405 if (td->ddir == DDIR_WRITE) {
4240cfa1 1406 write_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1407 if (td->runtime)
4240cfa1 1408 write_agg += (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1409 }
1410
7dd1389e 1411show_stat:
892199bd
JA
1412 show_thread_status(td);
1413 }
1414
1415 printf("Run status:\n");
1416 if (max_run[DDIR_READ])
1417 printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
1418 if (max_run[DDIR_WRITE])
1419 printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
fc24389f 1420
892199bd
JA
1421 return 0;
1422}