[PATCH] fio: kill max_depth print and fix for error handling in async io
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
7dd1389e 29#include <ctype.h>
18e0b78c 30#include <sched.h>
43000118 31#include <libaio.h>
892199bd
JA
32#include <sys/time.h>
33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/wait.h>
36#include <semaphore.h>
37#include <sys/ipc.h>
38#include <sys/shm.h>
39#include <asm/unistd.h>
40
4240cfa1
JA
41#define MAX_JOBS (1024)
42
892199bd
JA
43/*
44 * assume we don't have _get either, if _set isn't defined
45 */
46#ifndef __NR_ioprio_set
47
48#if defined(__i386__)
49#define __NR_ioprio_set 289
50#define __NR_ioprio_get 290
51#elif defined(__powerpc__) || defined(__powerpc64__)
52#define __NR_ioprio_set 273
53#define __NR_ioprio_get 274
54#elif defined(__x86_64__)
55#define __NR_ioprio_set 251
56#define __NR_ioprio_get 252
57#elif defined(__ia64__)
58#define __NR_ioprio_set 1274
59#define __NR_ioprio_get 1275
60#elif defined(__alpha__)
61#define __NR_ioprio_set 442
62#define __NR_ioprio_get 443
63#elif defined(__s390x__) || defined(__s390__)
64#define __NR_ioprio_set 282
65#define __NR_ioprio_get 283
66#else
67#error "Unsupported arch"
68#endif
69
70#endif
71
72static int ioprio_set(int which, int who, int ioprio)
73{
74 return syscall(__NR_ioprio_set, which, who, ioprio);
75}
76
77enum {
78 IOPRIO_WHO_PROCESS = 1,
79 IOPRIO_WHO_PGRP,
80 IOPRIO_WHO_USER,
81};
82
83#define IOPRIO_CLASS_SHIFT 13
84
892199bd
JA
85#define MASK (4095)
86
4240cfa1
JA
87#define DEF_BS (4096)
88#define DEF_TIMEOUT (30)
89#define DEF_RATE_CYCLE (1000)
90#define DEF_ODIRECT (1)
91#define DEF_SEQUENTIAL (1)
92#define DEF_WRITESTAT (0)
93#define DEF_RAND_REPEAT (1)
94
95#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 96
4240cfa1
JA
97static int sequential = DEF_SEQUENTIAL;
98static int write_stat = DEF_WRITESTAT;
99static int repeatable = DEF_RAND_REPEAT;
100static int timeout = DEF_TIMEOUT;
101static int odirect = DEF_ODIRECT;
102static int global_bs = DEF_BS;
02bdd9ba 103static int rate_quit = 1;
892199bd 104
892199bd 105static int thread_number;
7dd1389e 106static char *ini_file;
892199bd
JA
107
108static int shm_id;
109
18e0b78c
JA
110static cpu_set_t def_cpumask;
111
4240cfa1
JA
112enum {
113 DDIR_READ = 0,
114 DDIR_WRITE,
115};
892199bd 116
02bdd9ba
JA
117/*
118 * thread life cycle
119 */
120enum {
121 TD_NOT_CREATED = 0,
122 TD_CREATED,
123 TD_STARTED,
124 TD_EXITED,
125 TD_REAPED,
126};
127
892199bd
JA
128struct thread_data {
129 char file_name[256];
130 int thread_number;
131 int error;
132 int fd;
133 int stat_fd;
134 pid_t pid;
4240cfa1 135 volatile int terminate;
02bdd9ba 136 volatile int runstate;
f737299d
JA
137 unsigned int ddir;
138 unsigned int ioprio;
139 unsigned int sequential;
140 unsigned int bs;
141 unsigned int odirect;
142 unsigned int delay_sleep;
4240cfa1 143 unsigned int fsync_blocks;
fc24389f 144 unsigned int start_delay;
43000118 145 unsigned int use_aio;
18e0b78c 146 cpu_set_t cpumask;
86184d14 147
43000118
JA
148 io_context_t *aio_ctx;
149 struct iocb *aio_iocbs;
150 unsigned int aio_depth;
151 unsigned int aio_cur_depth;
152 struct io_event *aio_events;
153 char *aio_iocbs_status;
154
7dd1389e 155 unsigned int rate;
4240cfa1
JA
156 unsigned int ratemin;
157 unsigned int ratecycle;
158 unsigned long rate_usec_cycle;
159 long rate_pending_usleep;
160 unsigned long rate_blocks;
161 struct timeval lastrate;
86184d14 162
892199bd
JA
163 unsigned long max_latency; /* msec */
164 unsigned long min_latency; /* msec */
165 unsigned long runtime; /* sec */
166 unsigned long blocks;
4240cfa1 167 unsigned long io_blocks;
892199bd
JA
168 unsigned long last_block;
169 sem_t mutex;
892199bd
JA
170 struct drand48_data random_state;
171
172 /*
173 * bandwidth stat
174 */
175 unsigned long stat_time;
176 unsigned long stat_time_last;
177 unsigned long stat_blocks_last;
4240cfa1
JA
178
179 struct timeval start;
892199bd
JA
180};
181
182static struct thread_data *threads;
892199bd
JA
183
184static sem_t startup_sem;
185
5c24b2c4 186static void sig_handler(int sig)
892199bd
JA
187{
188 int i;
189
213b446c
JA
190 for (i = 0; i < thread_number; i++) {
191 struct thread_data *td = &threads[i];
192
193 td->terminate = 1;
194 td->start_delay = 0;
195 }
02bdd9ba
JA
196}
197
5c24b2c4 198static int init_random_state(struct thread_data *td)
892199bd
JA
199{
200 unsigned long seed = 123;
201
202 if (td->sequential)
203 return 0;
204
205 if (!repeatable) {
206 int fd = open("/dev/random", O_RDONLY);
207
208 if (fd == -1) {
209 td->error = errno;
210 return 1;
211 }
212
7dd1389e 213 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
892199bd
JA
214 td->error = EIO;
215 close(fd);
216 return 1;
217 }
218
219 close(fd);
220 }
221
222 srand48_r(seed, &td->random_state);
223 return 0;
224}
225
5c24b2c4 226static void shutdown_stat_file(struct thread_data *td)
892199bd
JA
227{
228 if (td->stat_fd != -1) {
229 fsync(td->stat_fd);
230 close(td->stat_fd);
231 }
232}
233
5c24b2c4 234static int init_stat_file(struct thread_data *td)
892199bd 235{
4240cfa1 236 char n[256];
892199bd
JA
237
238 if (!write_stat)
239 return 0;
240
892199bd
JA
241 sprintf(n, "%s.stat", td->file_name);
242 td->stat_fd = open(n, O_WRONLY | O_CREAT | O_TRUNC, 0644);
243 if (td->stat_fd == -1) {
892199bd
JA
244 td->error = errno;
245 return 1;
246 }
247
892199bd
JA
248 return 0;
249}
250
5c24b2c4 251static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
252{
253 double sec, usec;
254
255 sec = e->tv_sec - s->tv_sec;
256 usec = e->tv_usec - s->tv_usec;
257 if (sec > 0 && usec < 0) {
258 sec--;
259 usec += 1000000;
260 }
261
262 sec *= (double) 1000000;
263
264 return sec + usec;
265}
266
5c24b2c4 267static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
268{
269 double sec, usec;
270
271 sec = e->tv_sec - s->tv_sec;
272 usec = e->tv_usec - s->tv_usec;
273 if (sec > 0 && usec < 0) {
274 sec--;
275 usec += 1000000;
276 }
277
278 sec *= (double) 1000;
279 usec /= (double) 1000;
280
281 return sec + usec;
282}
283
5c24b2c4 284static unsigned long get_next_offset(struct thread_data *td)
892199bd
JA
285{
286 unsigned long b;
287 long r;
288
289 if (!td->sequential) {
290 lrand48_r(&td->random_state, &r);
291 b = (1+(double) (td->blocks-1) * r / (RAND_MAX+1.0));
292 } else {
293 b = td->last_block;
294 td->last_block++;
295 }
296
297 return b * td->bs;
298}
299
5c24b2c4 300static void add_stat_sample(struct thread_data *td, unsigned long msec)
892199bd
JA
301{
302 char sample[256];
303
304 if (!td->stat_fd)
305 return;
306
307#if 0
4240cfa1 308 sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
892199bd
JA
309 write(td->stat_fd, sample, strlen(sample));
310#else
311 td->stat_time += msec;
312 td->stat_time_last += msec;
313 td->stat_blocks_last++;
314
315 if (td->stat_time_last >= 500) {
316 unsigned long rate = td->stat_blocks_last * td->bs / (td->stat_time_last);
317
318 td->stat_time_last = 0;
319 td->stat_blocks_last = 0;
320 sprintf(sample, "%lu, %lu\n", td->stat_time, rate);
4240cfa1 321 //sprintf(sample, "%lu, %lu\n", td->io_blocks, msec);
892199bd
JA
322 write(td->stat_fd, sample, strlen(sample));
323 }
324#endif
325}
326
5c24b2c4 327static void usec_sleep(int usec)
892199bd 328{
86184d14
JA
329 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
330 struct timespec rem;
892199bd
JA
331
332 do {
86184d14
JA
333 rem.tv_sec = rem.tv_nsec = 0;
334 nanosleep(&req, &rem);
335 if (!rem.tv_nsec)
892199bd 336 break;
86184d14
JA
337
338 req.tv_nsec = rem.tv_nsec;
892199bd
JA
339 } while (1);
340}
341
5c24b2c4 342static void rate_throttle(struct thread_data *td, unsigned long time_spent)
86184d14 343{
4240cfa1
JA
344 if (!td->rate)
345 return;
346
86184d14
JA
347 if (time_spent < td->rate_usec_cycle) {
348 unsigned long s = td->rate_usec_cycle - time_spent;
349
350 td->rate_pending_usleep += s;
fad86e6a 351 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
352 usec_sleep(td->rate_pending_usleep);
353 td->rate_pending_usleep = 0;
354 }
4240cfa1 355 } else {
42b2b9fe
JA
356 long overtime = time_spent - td->rate_usec_cycle;
357
4240cfa1
JA
358 td->rate_pending_usleep -= overtime;
359 }
360}
361
5c24b2c4 362static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1
JA
363{
364 unsigned long spent = mtime_since(&td->start, now);
365 unsigned long rate;
366
367 /*
368 * allow a 2 second settle period in the beginning
369 */
370 if (spent < 2000)
371 return 0;
372
373 /*
374 * if rate blocks is set, sample is running
375 */
376 if (td->rate_blocks) {
377 spent = mtime_since(&td->lastrate, now);
378 if (spent < td->ratecycle)
379 return 0;
380
381 rate = ((td->io_blocks - td->rate_blocks) * td->bs) / spent;
382 if (rate < td->ratemin) {
383 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 384 if (rate_quit)
e6402082 385 sig_handler(0);
4240cfa1
JA
386 return 1;
387 }
86184d14 388 }
4240cfa1
JA
389
390 td->rate_blocks = td->io_blocks;
391 memcpy(&td->lastrate, now, sizeof(*now));
392 return 0;
86184d14
JA
393}
394
4240cfa1
JA
395#define should_fsync(td) ((td)->ddir == DDIR_WRITE && !(td)->odirect)
396
43000118 397static void do_sync_io(struct thread_data *td)
892199bd 398{
4240cfa1 399 struct timeval s, e;
892199bd 400 char *buffer, *ptr;
86184d14 401 unsigned long blocks, msec, usec;
892199bd 402
4240cfa1
JA
403 ptr = malloc(td->bs + MASK);
404 buffer = ALIGN(ptr);
892199bd 405
4240cfa1
JA
406 gettimeofday(&td->start, NULL);
407
408 if (td->ratemin)
409 memcpy(&td->lastrate, &td->start, sizeof(td->start));
892199bd
JA
410
411 for (blocks = 0; blocks < td->blocks; blocks++) {
412 off_t offset = get_next_offset(td);
413 int ret;
414
415 if (td->terminate)
416 break;
417
418 if (lseek(td->fd, offset, SEEK_SET) == -1) {
419 td->error = errno;
420 break;
421 }
422
423 if (td->delay_sleep)
86184d14 424 usec_sleep(td->delay_sleep);
892199bd
JA
425
426 gettimeofday(&s, NULL);
427
428 if (td->ddir == DDIR_READ)
429 ret = read(td->fd, buffer, td->bs);
430 else
431 ret = write(td->fd, buffer, td->bs);
432
f737299d 433 if (ret < (int) td->bs) {
892199bd
JA
434 if (ret == -1)
435 td->error = errno;
436 break;
437 }
438
4240cfa1
JA
439 td->io_blocks++;
440
441 if (should_fsync(td) && td->fsync_blocks &&
442 (td->io_blocks % td->fsync_blocks) == 0)
443 fsync(td->fd);
444
86184d14
JA
445 gettimeofday(&e, NULL);
446
447 usec = utime_since(&s, &e);
86184d14 448
4240cfa1 449 rate_throttle(td, usec);
892199bd 450
4240cfa1
JA
451 if (check_min_rate(td, &e)) {
452 td->error = ENODATA;
453 break;
454 }
892199bd 455
4240cfa1
JA
456 msec = usec / 1000;
457 add_stat_sample(td, msec);
892199bd
JA
458
459 if (msec < td->min_latency)
460 td->min_latency = msec;
461 if (msec > td->max_latency)
462 td->max_latency = msec;
463 }
464
4240cfa1 465 if (should_fsync(td))
892199bd
JA
466 fsync(td->fd);
467
468 gettimeofday(&e, NULL);
4240cfa1 469 td->runtime = mtime_since(&td->start, &e);
892199bd
JA
470
471 free(ptr);
472}
43000118
JA
473
474static void aio_put_iocb(struct thread_data *td, struct iocb *iocb)
475{
476 long offset = ((long) iocb - (long) td->aio_iocbs)/ sizeof(struct iocb);
477
478 td->aio_iocbs_status[offset] = 0;
479}
480
481static struct iocb *aio_get_iocb(struct thread_data *td, char *buffer)
482{
483 struct iocb *iocb = NULL;
484 int i;
485
486 for (i = 0; i < td->aio_depth; i++) {
487 if (td->aio_iocbs_status[i] == 0) {
488 td->aio_iocbs_status[i] = 1;
489 iocb = &td->aio_iocbs[i];
490 break;
491 }
492 }
493
494 if (iocb) {
495 off_t off = get_next_offset(td);
496 char *p = buffer + i * td->bs;
497
498 if (td->ddir == DDIR_READ)
499 io_prep_pread(iocb, td->fd, p, td->bs, off);
500 else
501 io_prep_pwrite(iocb, td->fd, p, td->bs, off);
502 }
503
504 return iocb;
505}
506
507static void do_async_io(struct thread_data *td)
508{
509 struct timeval s, e;
510 char *buf, *ptr;
511 unsigned long blocks, msec, usec;
43000118
JA
512
513 ptr = malloc(td->bs * td->aio_depth + MASK);
514 buf = ALIGN(ptr);
515
516 gettimeofday(&td->start, NULL);
517
518 if (td->ratemin)
519 memcpy(&td->lastrate, &td->start, sizeof(td->start));
520
521 for (blocks = 0; blocks < td->blocks; blocks++) {
522 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
523 struct timespec *timeout;
43000118 524 int ret, i, min_evts = 0;
8baf1bcc 525 struct iocb *iocb;
43000118
JA
526
527 if (td->terminate)
528 break;
529
530 if (td->delay_sleep)
531 usec_sleep(td->delay_sleep);
532
533 gettimeofday(&s, NULL);
534
8baf1bcc
JA
535 iocb = aio_get_iocb(td, buf);
536
43000118
JA
537 ret = io_submit(*td->aio_ctx, 1, &iocb);
538 if (ret < 0) {
539 td->error = errno;
540 break;
541 }
542
543 td->aio_cur_depth++;
43000118
JA
544
545 if (td->aio_cur_depth < td->aio_depth) {
546 timeout = &ts;
547 min_evts = 0;
548 } else {
549 timeout = NULL;
550 min_evts = 1;
551 }
552
553 ret = io_getevents(*td->aio_ctx, min_evts, td->aio_cur_depth, td->aio_events, timeout);
554 if (ret < 0) {
555 td->error = errno;
556 break;
557 } else if (!ret)
558 continue;
559
560 for (i = 0; i < ret; i++) {
561 struct io_event *ev = td->aio_events + i;
562
563 td->io_blocks++;
564 td->aio_cur_depth--;
565
566 iocb = ev->obj;
567 aio_put_iocb(td, iocb);
568 }
569
570 gettimeofday(&e, NULL);
571
572 usec = utime_since(&s, &e);
573
574 rate_throttle(td, usec);
575
576 if (check_min_rate(td, &e)) {
577 td->error = ENODATA;
578 break;
579 }
580
581 msec = usec / 1000;
582 add_stat_sample(td, msec);
583
584 if (msec < td->min_latency)
585 td->min_latency = msec;
586 if (msec > td->max_latency)
587 td->max_latency = msec;
588 }
589
590 gettimeofday(&e, NULL);
591 td->runtime = mtime_since(&td->start, &e);
592
593 free(ptr);
594}
595
596static void cleanup_aio(struct thread_data *td)
597{
598 /*
599 * flush pending events
600 */
601 if (td->aio_cur_depth)
602 io_getevents(*td->aio_ctx, td->aio_cur_depth, td->aio_cur_depth, td->aio_events, NULL);
603
604 if (td->aio_ctx) {
605 io_destroy(*td->aio_ctx);
606 free(td->aio_ctx);
607 }
608 if (td->aio_iocbs)
609 free(td->aio_iocbs);
610 if (td->aio_events)
611 free(td->aio_events);
612 if (td->aio_iocbs_status)
613 free(td->aio_iocbs_status);
614}
615
616static int init_aio(struct thread_data *td)
617{
618 td->aio_ctx = malloc(sizeof(*td->aio_ctx));
619
620 if (io_queue_init(td->aio_depth, td->aio_ctx)) {
621 td->error = errno;
622 return 1;
623 }
624
625 td->aio_iocbs = malloc(td->aio_depth * sizeof(struct iocb));
626 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
627 td->aio_iocbs_status = malloc(td->aio_depth * sizeof(char));
628 return 0;
629}
630
5c24b2c4 631static void *thread_main(int shm_id, int offset, char *argv[])
892199bd
JA
632{
633 struct thread_data *td;
634 void *data;
4240cfa1 635 struct stat st;
892199bd
JA
636 int ret = 1, flags;
637
638 data = shmat(shm_id, NULL, 0);
639 td = data + offset * sizeof(struct thread_data);
640 td->pid = getpid();
641
18e0b78c
JA
642 td->fd = -1;
643
644 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
645 td->error = errno;
646 goto err;
647 }
648
43000118 649 printf("Thread (%s) (pid=%u) (f=%s) (aio=%d) started\n", td->ddir == DDIR_READ ? "read" : "write", td->pid, td->file_name, td->use_aio);
892199bd
JA
650 fflush(stdout);
651
4240cfa1 652 sprintf(argv[0], "fio%d", offset);
892199bd 653
7dd1389e 654 flags = 0;
892199bd
JA
655 if (td->odirect)
656 flags |= O_DIRECT;
657
7dd1389e
JA
658 if (td->ddir == DDIR_READ)
659 td->fd = open(td->file_name, flags | O_RDONLY);
660 else
661 td->fd = open(td->file_name, flags | O_WRONLY | O_CREAT | O_TRUNC, 0644);
662
892199bd
JA
663 if (td->fd == -1) {
664 td->error = errno;
665 goto err;
666 }
667
43000118
JA
668 if (td->use_aio && init_aio(td))
669 goto err;
670
892199bd
JA
671 if (init_random_state(td))
672 goto out;
673 if (init_stat_file(td))
674 goto out;
675
676 if (td->ddir == DDIR_READ) {
4240cfa1 677 if (fstat(td->fd, &st) == -1) {
892199bd
JA
678 td->error = errno;
679 goto out;
680 }
681
4240cfa1 682 td->blocks = st.st_size / td->bs;
892199bd
JA
683 if (!td->blocks) {
684 td->error = EINVAL;
685 goto out;
686 }
687 } else
688 td->blocks = 1024 * 1024 * 1024 / td->bs;
689
f737299d 690 if (td->ioprio) {
892199bd
JA
691 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
692 td->error = errno;
693 goto out;
694 }
695 }
696
697 sem_post(&startup_sem);
698 sem_wait(&td->mutex);
43000118
JA
699
700 if (!td->use_aio)
701 do_sync_io(td);
702 else
703 do_async_io(td);
704
892199bd
JA
705 ret = 0;
706
707out:
892199bd
JA
708 shutdown_stat_file(td);
709err:
18e0b78c
JA
710 if (td->fd != -1)
711 close(td->fd);
43000118
JA
712 if (td->use_aio)
713 cleanup_aio(td);
892199bd
JA
714 if (ret)
715 sem_post(&startup_sem);
02bdd9ba
JA
716
717 td->runstate = TD_EXITED;
4240cfa1 718 shmdt(data);
892199bd
JA
719 return NULL;
720}
721
5c24b2c4 722static void free_shm(void)
892199bd
JA
723{
724 shmdt(threads);
725}
726
5c24b2c4 727static void show_thread_status(struct thread_data *td)
892199bd
JA
728{
729 int prio, prio_class;
730 unsigned long bw = 0;
731
213b446c
JA
732 if (!td->io_blocks && !td->error)
733 return;
734
892199bd 735 if (td->runtime)
4240cfa1 736 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
737
738 prio = td->ioprio & 0xff;
739 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
740
4240cfa1 741 printf("thread%d (%s): err=%2d, prio=%1d/%1d maxl=%5lumsec, io=%6luMiB, bw=%6luKiB/sec\n", td->thread_number, td->ddir == DDIR_READ ? " read": "write", td->error, prio_class, prio, td->max_latency, td->io_blocks * td->bs >> 20, bw);
892199bd
JA
742}
743
5c24b2c4 744static int setup_rate(struct thread_data *td)
86184d14 745{
4240cfa1
JA
746 int nr_reads_per_sec;
747
748 if (!td->rate)
749 return 0;
750
751 if (td->rate < td->ratemin) {
752 fprintf(stderr, "min rate larger than nominal rate\n");
753 return -1;
754 }
86184d14 755
4240cfa1 756 nr_reads_per_sec = td->rate * 1024 / td->bs;
86184d14
JA
757 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
758 td->rate_pending_usleep = 0;
4240cfa1 759 return 0;
86184d14
JA
760}
761
5c24b2c4 762static struct thread_data *get_new_job(void)
892199bd 763{
4240cfa1
JA
764 struct thread_data *td;
765
766 if (thread_number >= MAX_JOBS)
767 return NULL;
768
769 td = &threads[thread_number++];
fc24389f 770 memset(td, 0, sizeof(*td));
892199bd 771
86184d14 772 td->thread_number = thread_number;
f737299d
JA
773 td->ddir = DDIR_READ;
774 td->bs = global_bs;
775 td->odirect = 1;
776 td->delay_sleep = 0;
777 td->rate = 0;
4240cfa1 778 td->ratecycle = DEF_RATE_CYCLE;
f737299d
JA
779 td->sequential = sequential;
780 td->ioprio = 0;
43000118
JA
781 td->use_aio = 0;
782 td->aio_depth = 0;
783 td->aio_cur_depth = 0;
f737299d
JA
784 memcpy(&td->cpumask, &def_cpumask, sizeof(td->cpumask));
785
786 return td;
787}
788
4240cfa1
JA
789static void put_job(struct thread_data *td)
790{
791 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
792 thread_number--;
793}
794
5c24b2c4
JA
795static int add_job(struct thread_data *td, const char *filename, int prioclass,
796 int prio)
f737299d
JA
797{
798 strcpy(td->file_name, filename);
892199bd 799 td->stat_fd = -1;
4240cfa1 800 sem_init(&td->mutex, 1, 0);
892199bd 801 td->min_latency = 10000000;
f737299d
JA
802 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
803
43000118
JA
804 if (td->use_aio && !td->aio_depth)
805 td->aio_depth = 1;
806
4240cfa1
JA
807 if (setup_rate(td))
808 return -1;
f737299d 809
43000118 810 printf("Client%d: file=%s, rw=%d, prio=%d, seq=%d, odir=%d, bs=%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, filename, td->ddir, td->ioprio, td->sequential, td->odirect, td->bs, td->rate, td->use_aio, td->aio_depth);
4240cfa1 811 return 0;
892199bd
JA
812}
813
18e0b78c
JA
814static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
815{
f737299d 816 unsigned int i;
18e0b78c
JA
817
818 CPU_ZERO(&cpumask);
819
820 for (i = 0; i < sizeof(int) * 8; i++) {
821 if ((1 << i) & cpu)
822 CPU_SET(i, &cpumask);
823 }
824}
825
5c24b2c4 826static void fill_option(const char *input, char *output)
892199bd
JA
827{
828 int i;
829
830 i = 0;
831 while (input[i] != ',' && input[i] != '}' && input[i] != '\0') {
832 output[i] = input[i];
833 i++;
834 }
835
836 output[i] = '\0';
837}
838
839/*
840 * job key words:
841 *
842 * file=
843 * bs=
844 * rw=
845 * direct=
846 */
5c24b2c4 847static void parse_jobs_cmd(int argc, char *argv[], int index)
892199bd 848{
f737299d
JA
849 struct thread_data *td;
850 unsigned int prio, prioclass, cpu;
892199bd
JA
851 char *string, *filename, *p, *c;
852 int i;
853
854 string = malloc(256);
855 filename = malloc(256);
856
857 for (i = index; i < argc; i++) {
858 p = argv[i];
859
860 c = strpbrk(p, "{");
861 if (!c)
862 break;
863
864 filename[0] = 0;
4240cfa1 865
f737299d 866 td = get_new_job();
4240cfa1
JA
867 if (!td)
868 break;
f737299d 869
892199bd 870 prioclass = 2;
f737299d 871 prio = 4;
892199bd
JA
872
873 c = strstr(p, "rw=");
874 if (c) {
875 c += 3;
876 if (*c == '0')
f737299d 877 td->ddir = DDIR_READ;
892199bd 878 else
f737299d 879 td->ddir = DDIR_WRITE;
892199bd
JA
880 }
881
882 c = strstr(p, "prio=");
883 if (c) {
884 c += 5;
885 prio = *c - '0';
886 }
887
888 c = strstr(p, "prioclass=");
889 if (c) {
890 c += 10;
891 prioclass = *c - '0';
892 }
893
894 c = strstr(p, "file=");
895 if (c) {
896 c += 5;
897 fill_option(c, filename);
898 }
899
900 c = strstr(p, "bs=");
901 if (c) {
902 c += 3;
903 fill_option(c, string);
f737299d
JA
904 td->bs = strtoul(string, NULL, 10);
905 td->bs <<= 10;
892199bd
JA
906 }
907
908 c = strstr(p, "direct=");
909 if (c) {
910 c += 7;
911 if (*c != '0')
f737299d 912 td->odirect = 1;
892199bd 913 else
f737299d 914 td->odirect = 0;
892199bd
JA
915 }
916
917 c = strstr(p, "delay=");
918 if (c) {
919 c += 6;
920 fill_option(c, string);
f737299d 921 td->delay_sleep = strtoul(string, NULL, 10);
892199bd
JA
922 }
923
86184d14
JA
924 c = strstr(p, "rate=");
925 if (c) {
926 c += 5;
927 fill_option(c, string);
f737299d 928 td->rate = strtoul(string, NULL, 10);
86184d14
JA
929 }
930
4240cfa1
JA
931 c = strstr(p, "ratemin=");
932 if (c) {
933 c += 8;
934 fill_option(c, string);
935 td->ratemin = strtoul(string, NULL, 10);
936 }
937
938 c = strstr(p, "ratecycle=");
939 if (c) {
940 c += 10;
941 fill_option(c, string);
942 td->ratecycle = strtoul(string, NULL, 10);
943 }
944
18e0b78c
JA
945 c = strstr(p, "cpumask=");
946 if (c) {
947 c += 8;
948 fill_option(c, string);
949 cpu = strtoul(string, NULL, 10);
f737299d 950 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
951 }
952
4240cfa1
JA
953 c = strstr(p, "fsync=");
954 if (c) {
955 c += 6;
956 fill_option(c, string);
957 td->fsync_blocks = strtoul(string, NULL, 10);
958 }
18e0b78c 959
fc24389f
JA
960 c = strstr(p, "startdelay=");
961 if (c) {
962 c += 11;
963 fill_option(c, string);
964 td->start_delay = strtoul(string, NULL, 10);
965 }
966
43000118
JA
967 c = strstr(p, "aio_depth=");
968 if (c) {
969 c += 10;
970 fill_option(c, string);
971 td->aio_depth = strtoul(string, NULL, 10);
972 }
973
974 c = strstr(p, "aio");
975 if (c)
976 td->use_aio = 1;
977
892199bd
JA
978 c = strstr(p, "random");
979 if (c)
f737299d 980 td->sequential = 0;
892199bd
JA
981 c = strstr(p, "sequential");
982 if (c)
f737299d 983 td->sequential = 1;
892199bd 984
4240cfa1
JA
985 if (add_job(td, filename, prioclass, prio))
986 put_job(td);
892199bd
JA
987 }
988
7dd1389e
JA
989 free(string);
990 free(filename);
892199bd
JA
991}
992
5c24b2c4 993static int check_int(char *p, char *name, unsigned int *val)
7dd1389e
JA
994{
995 char str[128];
996
997 sprintf(str, "%s=%%d", name);
998 if (sscanf(p, str, val) == 1)
999 return 0;
1000
1001 sprintf(str, "%s = %%d", name);
1002 if (sscanf(p, str, val) == 1)
1003 return 0;
1004
1005 return 1;
1006}
1007
5c24b2c4 1008static int is_empty(char *line)
7dd1389e
JA
1009{
1010 unsigned int i;
1011
1012 for (i = 0; i < strlen(line); i++)
1013 if (!isspace(line[i]) && !iscntrl(line[i]))
1014 return 0;
1015
1016 return 1;
1017}
1018
5c24b2c4 1019static int parse_jobs_ini(char *file)
7dd1389e 1020{
4240cfa1 1021 unsigned int prioclass, prio, cpu;
f737299d 1022 struct thread_data *td;
7dd1389e
JA
1023 char *string, *name;
1024 fpos_t off;
1025 FILE *f;
1026 char *p;
1027
1028 f = fopen(file, "r");
1029 if (!f) {
1030 perror("fopen");
4240cfa1 1031 return 1;
7dd1389e
JA
1032 }
1033
1034 string = malloc(4096);
1035 name = malloc(256);
1036
7dd1389e
JA
1037 while ((p = fgets(string, 4096, f)) != NULL) {
1038 if (sscanf(p, "[%s]", name) != 1)
1039 continue;
1040
1041 name[strlen(name) - 1] = '\0';
1042
f737299d 1043 td = get_new_job();
4240cfa1
JA
1044 if (!td)
1045 break;
f737299d 1046
7dd1389e 1047 prioclass = 2;
f737299d 1048 prio = 4;
7dd1389e
JA
1049
1050 fgetpos(f, &off);
1051 while ((p = fgets(string, 4096, f)) != NULL) {
1052 if (is_empty(p))
e6402082
JA
1053 continue;
1054 if (strstr(p, "["))
7dd1389e 1055 break;
f737299d
JA
1056 if (!check_int(p, "bs", &td->bs)) {
1057 td->bs <<= 10;
7dd1389e
JA
1058 fgetpos(f, &off);
1059 continue;
1060 }
f737299d 1061 if (!check_int(p, "rw", &td->ddir)) {
7dd1389e
JA
1062 fgetpos(f, &off);
1063 continue;
1064 }
1065 if (!check_int(p, "prio", &prio)) {
1066 fgetpos(f, &off);
1067 continue;
1068 }
1069 if (!check_int(p, "prioclass", &prioclass)) {
1070 fgetpos(f, &off);
1071 continue;
1072 }
f737299d 1073 if (!check_int(p, "direct", &td->odirect)) {
7dd1389e
JA
1074 fgetpos(f, &off);
1075 continue;
1076 }
f737299d 1077 if (!check_int(p, "rate", &td->rate)) {
7dd1389e
JA
1078 fgetpos(f, &off);
1079 continue;
1080 }
4240cfa1
JA
1081 if (!check_int(p, "ratemin", &td->ratemin)) {
1082 fgetpos(f, &off);
1083 continue;
1084 }
1085 if (!check_int(p, "ratecycle", &td->ratecycle)) {
1086 fgetpos(f, &off);
1087 continue;
1088 }
f737299d 1089 if (!check_int(p, "delay", &td->delay_sleep)) {
7dd1389e
JA
1090 fgetpos(f, &off);
1091 continue;
1092 }
18e0b78c 1093 if (!check_int(p, "cpumask", &cpu)) {
f737299d 1094 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
1095 fgetpos(f, &off);
1096 continue;
1097 }
4240cfa1
JA
1098 if (!check_int(p, "fsync", &td->fsync_blocks)) {
1099 fgetpos(f, &off);
1100 continue;
1101 }
fc24389f
JA
1102 if (!check_int(p, "startdelay", &td->start_delay)) {
1103 fgetpos(f, &off);
1104 continue;
1105 }
43000118
JA
1106 if (!check_int(p, "aio_depth", &td->aio_depth)) {
1107 fgetpos(f, &off);
1108 continue;
1109 }
1110 if (!strncmp(p, "sequential", 10)) {
f737299d 1111 td->sequential = 1;
7dd1389e
JA
1112 fgetpos(f, &off);
1113 continue;
1114 }
43000118 1115 if (!strncmp(p, "random", 6)) {
f737299d 1116 td->sequential = 0;
7dd1389e
JA
1117 fgetpos(f, &off);
1118 continue;
1119 }
43000118
JA
1120 if (!strncmp(p, "aio", 3)) {
1121 td->use_aio = 1;
1122 fgetpos(f, &off);
1123 continue;
1124 }
1125
e6402082 1126 printf("Client%d: bad option %s\n",td->thread_number,p);
7dd1389e
JA
1127 }
1128 fsetpos(f, &off);
1129
4240cfa1
JA
1130 if (add_job(td, name, prioclass, prio))
1131 put_job(td);
7dd1389e
JA
1132 }
1133
1134 free(string);
1135 free(name);
fc7d63df 1136 fclose(f);
4240cfa1 1137 return 0;
7dd1389e
JA
1138}
1139
5c24b2c4 1140static int parse_options(int argc, char *argv[])
892199bd 1141{
01c4d8de 1142 int i;
892199bd
JA
1143
1144 for (i = 1; i < argc; i++) {
1145 char *parm = argv[i];
1146
1147 if (parm[0] != '-')
1148 break;
1149
1150 parm++;
1151 switch (*parm) {
1152 case 's':
1153 parm++;
1154 sequential = !!atoi(parm);
1155 break;
1156 case 'b':
1157 parm++;
1158 global_bs = atoi(parm);
1159 global_bs <<= 10;
4240cfa1
JA
1160 if (!global_bs) {
1161 printf("bad block size\n");
1162 global_bs = DEF_BS;
1163 }
892199bd
JA
1164 break;
1165 case 't':
1166 parm++;
1167 timeout = atoi(parm);
1168 break;
1169 case 'w':
1170 parm++;
1171 write_stat = !!atoi(parm);
1172 break;
1173 case 'r':
1174 parm++;
1175 repeatable = !!atoi(parm);
1176 break;
02bdd9ba
JA
1177 case 'R':
1178 parm++;
1179 rate_quit = !!atoi(parm);
1180 break;
892199bd
JA
1181 case 'o':
1182 parm++;
1183 odirect = !!atoi(parm);
1184 break;
7dd1389e
JA
1185 case 'f':
1186 if (i + 1 >= argc) {
1187 printf("-f needs file as arg\n");
1188 break;
1189 }
1190 ini_file = strdup(argv[i+1]);
1191 break;
892199bd 1192 default:
7dd1389e 1193 printf("bad option %s\n", argv[i]);
892199bd
JA
1194 break;
1195 }
1196 }
1197
892199bd
JA
1198 return i;
1199}
1200
213b446c 1201static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 1202{
213b446c 1203 int i;
02bdd9ba
JA
1204
1205 for (i = 0; i < thread_number; i++) {
1206 struct thread_data *td = &threads[i];
1207
213b446c
JA
1208 if (td->runstate != TD_EXITED)
1209 continue;
02bdd9ba 1210
213b446c
JA
1211 td->runstate = TD_REAPED;
1212 waitpid(td->pid, NULL, 0);
1213 (*nr_running)--;
1214 (*m_rate) -= td->ratemin;
1215 (*t_rate) -= td->rate;
e6402082
JA
1216
1217 if (td->terminate)
1218 continue;
1219
213b446c
JA
1220 printf("Threads now running: %d", *nr_running);
1221 if (*m_rate || *t_rate)
1222 printf(", rate %d/%dKiB/sec", *t_rate, *m_rate);
1223 printf("\n");
1224 }
02bdd9ba
JA
1225}
1226
fc24389f
JA
1227static void run_threads(char *argv[])
1228{
1229 struct timeval genesis, now;
1230 struct thread_data *td;
1231 unsigned long spent;
213b446c 1232 int i, todo, nr_running, m_rate, t_rate;
fc24389f
JA
1233
1234 gettimeofday(&genesis, NULL);
1235
1236 printf("Starting %d threads\n", thread_number);
1237 fflush(stdout);
1238
1239 if (timeout) {
1240 signal(SIGALRM, sig_handler);
1241 alarm(timeout);
1242 }
1243
1244 todo = thread_number;
02bdd9ba 1245 nr_running = 0;
213b446c 1246 m_rate = t_rate = 0;
fc24389f 1247
213b446c 1248 while (todo) {
fc24389f
JA
1249 for (i = 0; i < thread_number; i++) {
1250 td = &threads[i];
1251
02bdd9ba 1252 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
1253 continue;
1254
213b446c
JA
1255 /*
1256 * never got a chance to start, killed by other
1257 * thread for some reason
1258 */
1259 if (td->terminate) {
1260 todo--;
1261 continue;
1262 }
1263
fc24389f
JA
1264 if (td->start_delay) {
1265 gettimeofday(&now, NULL);
1266 spent = mtime_since(&genesis, &now);
1267
1268 if (td->start_delay * 1000 > spent)
1269 continue;
1270 }
1271
02bdd9ba 1272 td->runstate = TD_CREATED;
fc24389f
JA
1273 sem_init(&startup_sem, 1, 1);
1274 todo--;
1275
1276 if (fork())
1277 sem_wait(&startup_sem);
1278 else {
1279 thread_main(shm_id, i, argv);
1280 exit(0);
1281 }
1282 }
1283
1284 for (i = 0; i < thread_number; i++) {
1285 struct thread_data *td = &threads[i];
1286
02bdd9ba
JA
1287 if (td->runstate == TD_CREATED) {
1288 td->runstate = TD_STARTED;
1289 nr_running++;
213b446c
JA
1290 m_rate += td->ratemin;
1291 t_rate += td->rate;
fc24389f 1292 sem_post(&td->mutex);
213b446c
JA
1293
1294 printf("Threads now running: %d", nr_running);
1295 if (m_rate || t_rate)
1296 printf(", rate %d/%dKiB/sec", t_rate, m_rate);
1297 printf("\n");
fc24389f
JA
1298 }
1299 }
1300
213b446c 1301 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 1302
fc24389f
JA
1303 if (todo)
1304 usleep(100000);
1305 }
02bdd9ba
JA
1306
1307 while (nr_running) {
213b446c 1308 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
1309 usleep(10000);
1310 }
fc24389f
JA
1311}
1312
892199bd
JA
1313int main(int argc, char *argv[])
1314{
1315 static unsigned long max_run[2], min_run[2], total_blocks[2];
1316 static unsigned long max_bw[2], min_bw[2], maxl[2], minl[2];
1317 static unsigned long read_mb, write_mb, read_agg, write_agg;
4240cfa1 1318 int i;
18e0b78c 1319
4240cfa1 1320 shm_id = shmget(0, MAX_JOBS * sizeof(struct thread_data), IPC_CREAT | 0600);
892199bd
JA
1321 if (shm_id == -1) {
1322 perror("shmget");
1323 return 1;
1324 }
1325
1326 threads = shmat(shm_id, NULL, 0);
86184d14
JA
1327 if (threads == (void *) -1 ) {
1328 perror("shmat");
1329 return 1;
1330 }
892199bd
JA
1331
1332 atexit(free_shm);
1333
4240cfa1
JA
1334 if (sched_getaffinity(getpid(), sizeof(def_cpumask), &def_cpumask) == -1) {
1335 perror("sched_getaffinity");
1336 return 1;
1337 }
1338
892199bd 1339 i = parse_options(argc, argv);
7dd1389e 1340
4240cfa1
JA
1341 if (ini_file) {
1342 if (parse_jobs_ini(ini_file))
1343 return 1;
1344 } else
1345 parse_jobs_cmd(argc, argv, i);
7dd1389e 1346
4240cfa1
JA
1347 if (!thread_number) {
1348 printf("Nothing to do\n");
1349 return 1;
1350 }
7dd1389e
JA
1351
1352 printf("%s: %s, bs=%uKiB, timeo=%u, write_stat=%u, odirect=%d\n", argv[0], sequential ? "sequential" : "random", global_bs >> 10, timeout, write_stat, odirect);
1353
fc24389f 1354 run_threads(argv);
892199bd 1355
892199bd
JA
1356 min_bw[0] = min_run[0] = ~0UL;
1357 min_bw[1] = min_run[1] = ~0UL;
1358 minl[0] = minl[1] = ~0UL;
1359 for (i = 0; i < thread_number; i++) {
1360 struct thread_data *td = &threads[i];
1361 unsigned long bw = 0;
1362
1363 if (td->error)
7dd1389e 1364 goto show_stat;
892199bd
JA
1365
1366 if (td->runtime < min_run[td->ddir])
1367 min_run[td->ddir] = td->runtime;
1368 if (td->runtime > max_run[td->ddir])
1369 max_run[td->ddir] = td->runtime;
1370
1371 if (td->runtime)
4240cfa1 1372 bw = (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1373 if (bw < min_bw[td->ddir])
1374 min_bw[td->ddir] = bw;
1375 if (bw > max_bw[td->ddir])
1376 max_bw[td->ddir] = bw;
1377 if (td->max_latency < minl[td->ddir])
1378 minl[td->ddir] = td->max_latency;
1379 if (td->max_latency > maxl[td->ddir])
1380 maxl[td->ddir] = td->max_latency;
1381
4240cfa1 1382 total_blocks[td->ddir] += td->io_blocks;
892199bd
JA
1383
1384 if (td->ddir == DDIR_READ) {
4240cfa1 1385 read_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1386 if (td->runtime)
4240cfa1 1387 read_agg += (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1388 }
1389 if (td->ddir == DDIR_WRITE) {
4240cfa1 1390 write_mb += (td->bs * td->io_blocks) >> 20;
892199bd 1391 if (td->runtime)
4240cfa1 1392 write_agg += (td->io_blocks * td->bs) / td->runtime;
892199bd
JA
1393 }
1394
7dd1389e 1395show_stat:
892199bd
JA
1396 show_thread_status(td);
1397 }
1398
1399 printf("Run status:\n");
1400 if (max_run[DDIR_READ])
1401 printf(" READ: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", read_mb, read_agg, minl[0], maxl[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
1402 if (max_run[DDIR_WRITE])
1403 printf(" WRITE: io=%luMiB, aggrb=%lu, minl=%lu, maxl=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", write_mb, write_agg, minl[1], maxl[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
fc24389f 1404
892199bd
JA
1405 return 0;
1406}