[PATCH] fio: update tiobench example to use 'numjobs'
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
7dd1389e 29#include <ctype.h>
18e0b78c 30#include <sched.h>
43000118 31#include <libaio.h>
e128065d 32#include <math.h>
02983297 33#include <limits.h>
49d2caab 34#include <assert.h>
892199bd
JA
35#include <sys/time.h>
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/wait.h>
39#include <semaphore.h>
40#include <sys/ipc.h>
41#include <sys/shm.h>
c94deb1c 42#include <sys/ioctl.h>
892199bd 43#include <asm/unistd.h>
946d8870 44#include <asm/types.h>
49d2caab 45#include <asm/bitops.h>
892199bd 46
fd11d7af 47#include "arch.h"
2c83567e 48#include "list.h"
e8457004 49#include "md5.h"
2c83567e 50
c94deb1c
JA
51#ifndef BLKGETSIZE64
52#define BLKGETSIZE64 _IOR(0x12,114,size_t)
53#endif
54
4240cfa1
JA
55#define MAX_JOBS (1024)
56
892199bd
JA
57static int ioprio_set(int which, int who, int ioprio)
58{
59 return syscall(__NR_ioprio_set, which, who, ioprio);
60}
61
b95799ca
JA
62/*
63 * we want fadvise64 really, but it's so tangled... later
64 */
65static int fadvise(int fd, loff_t offset, size_t len, int advice)
66{
67#if 0
68 return syscall(__NR_fadvise64, fd, offset, offset >> 32, len, advice);
69#else
70 return posix_fadvise(fd, (off_t) offset, len, advice);
71#endif
72}
73
892199bd
JA
74enum {
75 IOPRIO_WHO_PROCESS = 1,
76 IOPRIO_WHO_PGRP,
77 IOPRIO_WHO_USER,
78};
79
80#define IOPRIO_CLASS_SHIFT 13
81
892199bd
JA
82#define MASK (4095)
83
4240cfa1 84#define DEF_BS (4096)
01f79976 85#define DEF_TIMEOUT (0)
4240cfa1
JA
86#define DEF_RATE_CYCLE (1000)
87#define DEF_ODIRECT (1)
88#define DEF_SEQUENTIAL (1)
4240cfa1 89#define DEF_RAND_REPEAT (1)
02983297
JA
90#define DEF_OVERWRITE (0)
91#define DEF_CREATE (1)
b95799ca 92#define DEF_INVALIDATE (1)
99c6704f 93#define DEF_SYNCIO (0)
eb39963f 94#define DEF_RANDSEED (0xb1899bedUL)
1d035750 95#define DEF_BWAVGTIME (500)
fc097bfe
JA
96#define DEF_CREATE_SER (1)
97#define DEF_CREATE_FSYNC (1)
b6794fbf 98#define DEF_LOOPS (1)
cfc702bd 99#define DEF_VERIFY (0)
2a81240d 100#define DEF_STONEWALL (0)
fe74f555 101#define DEF_NUMJOBS (1)
4240cfa1
JA
102
103#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 104
4240cfa1 105static int repeatable = DEF_RAND_REPEAT;
02bdd9ba 106static int rate_quit = 1;
a0a9b35b
JA
107static int write_lat_log;
108static int write_bw_log;
98dd52d6 109static int exitall_on_terminate;
892199bd 110
892199bd 111static int thread_number;
7dd1389e 112static char *ini_file;
892199bd 113
8867c0a8
JA
114static int max_jobs = MAX_JOBS;
115
8dbff0b1
JA
116static char run_str[MAX_JOBS + 1];
117
892199bd
JA
118static int shm_id;
119
4240cfa1
JA
120enum {
121 DDIR_READ = 0,
122 DDIR_WRITE,
123};
892199bd 124
02bdd9ba
JA
125/*
126 * thread life cycle
127 */
128enum {
129 TD_NOT_CREATED = 0,
130 TD_CREATED,
e8457004
JA
131 TD_RUNNING,
132 TD_VERIFYING,
02bdd9ba
JA
133 TD_EXITED,
134 TD_REAPED,
135};
136
99c6704f
JA
137enum {
138 MEM_MALLOC,
139 MEM_SHM,
140};
141
2c83567e
JA
142/*
143 * The io unit
144 */
145struct io_u {
146 struct iocb iocb;
57d753e3 147 struct timeval start_time;
2c83567e
JA
148 struct timeval issue_time;
149
2c83567e
JA
150 char *buf;
151 unsigned int buflen;
4ac89145 152 unsigned long long offset;
2c83567e
JA
153
154 struct list_head list;
155};
156
57d753e3
JA
157struct io_stat {
158 unsigned long val;
159 unsigned long val_sq;
160 unsigned long max_val;
161 unsigned long min_val;
162 unsigned long samples;
163};
164
a0a9b35b
JA
165struct io_sample {
166 unsigned long time;
167 unsigned long val;
168};
169
170struct io_log {
171 unsigned long nr_samples;
172 unsigned long max_samples;
173 struct io_sample *log;
174};
175
645785e5
JA
176struct io_piece {
177 struct list_head list;
178 unsigned long long offset;
179 unsigned int len;
180};
181
e8457004
JA
182#define FIO_HDR_MAGIC 0xf00baaef
183
184struct verify_header {
185 unsigned int fio_magic;
186 unsigned int len;
187 char md5_digest[MD5_HASH_WORDS * 4];
188};
189
02983297 190#define td_read(td) ((td)->ddir == DDIR_READ)
62bb4285
JA
191#define td_write(td) ((td)->ddir == DDIR_WRITE)
192#define should_fsync(td) (td_write(td) && !(td)->odirect)
02983297 193
49d2caab 194#define BLOCKS_PER_MAP (8 * sizeof(long))
75b2ab2c
JA
195#define TO_MAP_BLOCK(td, b) ((b) - ((td)->file_offset / (td)->min_bs))
196#define RAND_MAP_IDX(td, b) (TO_MAP_BLOCK(td, b) / BLOCKS_PER_MAP)
197#define RAND_MAP_BIT(td, b) (TO_MAP_BLOCK(td, b) & (BLOCKS_PER_MAP - 1))
49d2caab 198
892199bd
JA
199struct thread_data {
200 char file_name[256];
fe74f555 201 char directory[256];
892199bd
JA
202 int thread_number;
203 int error;
204 int fd;
892199bd 205 pid_t pid;
6b71c826 206 char *orig_buffer;
4240cfa1 207 volatile int terminate;
02bdd9ba 208 volatile int runstate;
40ef7f64 209 volatile int old_runstate;
f737299d
JA
210 unsigned int ddir;
211 unsigned int ioprio;
212 unsigned int sequential;
213 unsigned int bs;
8c033f93
JA
214 unsigned int min_bs;
215 unsigned int max_bs;
f737299d 216 unsigned int odirect;
e97712ed 217 unsigned int thinktime;
4240cfa1 218 unsigned int fsync_blocks;
fc24389f 219 unsigned int start_delay;
47d45203 220 unsigned int timeout;
43000118 221 unsigned int use_aio;
02983297
JA
222 unsigned int create_file;
223 unsigned int overwrite;
b95799ca 224 unsigned int invalidate_cache;
1d035750 225 unsigned int bw_avg_time;
fc097bfe
JA
226 unsigned int create_serialize;
227 unsigned int create_fsync;
b6794fbf 228 unsigned int loops;
02983297
JA
229 unsigned long long file_size;
230 unsigned long long file_offset;
74b4b5fb 231 unsigned int sync_io;
99c6704f 232 unsigned int mem_type;
cfc702bd 233 unsigned int verify;
2a81240d 234 unsigned int stonewall;
fe74f555 235 unsigned int numjobs;
18e0b78c 236 cpu_set_t cpumask;
86184d14 237
7889f07b 238 struct drand48_data bsrange_state;
e8457004 239 struct drand48_data verify_state;
7889f07b 240
99c6704f
JA
241 int shm_id;
242
63a09e51
JA
243 off_t cur_off;
244
254605cd 245 io_context_t aio_ctx;
43000118 246 unsigned int aio_depth;
43000118 247 struct io_event *aio_events;
2c83567e
JA
248
249 unsigned int cur_depth;
250 struct list_head io_u_freelist;
251 struct list_head io_u_busylist;
43000118 252
7dd1389e 253 unsigned int rate;
4240cfa1
JA
254 unsigned int ratemin;
255 unsigned int ratecycle;
256 unsigned long rate_usec_cycle;
257 long rate_pending_usleep;
49d2caab 258 unsigned long rate_bytes;
4240cfa1 259 struct timeval lastrate;
86184d14 260
892199bd 261 unsigned long runtime; /* sec */
49d2caab 262 unsigned long long io_size;
e2ba35d9 263
4240cfa1 264 unsigned long io_blocks;
49d2caab
JA
265 unsigned long io_bytes;
266 unsigned long this_io_bytes;
267 unsigned long last_bytes;
892199bd 268 sem_t mutex;
49d2caab 269
892199bd 270 struct drand48_data random_state;
49d2caab
JA
271 unsigned long *file_map;
272 unsigned int num_maps;
892199bd
JA
273
274 /*
e128065d 275 * bandwidth and latency stats
892199bd 276 */
57d753e3
JA
277 struct io_stat clat_stat; /* completion latency */
278 struct io_stat slat_stat; /* submission latency */
279
280 struct io_stat bw_stat; /* bandwidth stats */
49d2caab 281 unsigned long stat_io_bytes;
fd1ae4c9 282 struct timeval stat_sample_time;
4240cfa1 283
a0a9b35b
JA
284 struct io_log *lat_log;
285 struct io_log *bw_log;
286
4240cfa1 287 struct timeval start;
92b229ed
JA
288 struct rusage ru_start;
289 struct rusage ru_end;
645785e5
JA
290
291 struct list_head io_hist_list;
892199bd
JA
292};
293
294static struct thread_data *threads;
47d45203 295static struct thread_data def_thread;
892199bd
JA
296
297static sem_t startup_sem;
298
5c24b2c4 299static void sig_handler(int sig)
892199bd
JA
300{
301 int i;
302
213b446c
JA
303 for (i = 0; i < thread_number; i++) {
304 struct thread_data *td = &threads[i];
305
306 td->terminate = 1;
307 td->start_delay = 0;
308 }
02bdd9ba
JA
309}
310
e8457004 311static int init_random_state(struct thread_data *td)
946d8870 312{
e8457004 313 unsigned long seed;
49d2caab 314 int fd, num_maps, blocks;
946d8870 315
e8457004
JA
316 fd = open("/dev/random", O_RDONLY);
317 if (fd == -1) {
318 td->error = errno;
319 return 1;
946d8870
JA
320 }
321
e8457004
JA
322 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
323 td->error = EIO;
324 close(fd);
325 return 1;
946d8870
JA
326 }
327
e8457004 328 close(fd);
7889f07b
JA
329
330 srand48_r(seed, &td->bsrange_state);
e8457004 331 srand48_r(seed, &td->verify_state);
892199bd
JA
332
333 if (td->sequential)
334 return 0;
335
e8457004
JA
336 if (repeatable)
337 seed = DEF_RANDSEED;
892199bd 338
49d2caab
JA
339 blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
340 num_maps = blocks / BLOCKS_PER_MAP;
341 td->file_map = malloc(num_maps * sizeof(long));
342 td->num_maps = num_maps;
343 memset(td->file_map, 0, num_maps * sizeof(long));
344
892199bd
JA
345 srand48_r(seed, &td->random_state);
346 return 0;
347}
348
5c24b2c4 349static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
350{
351 double sec, usec;
352
353 sec = e->tv_sec - s->tv_sec;
354 usec = e->tv_usec - s->tv_usec;
355 if (sec > 0 && usec < 0) {
356 sec--;
357 usec += 1000000;
358 }
359
360 sec *= (double) 1000000;
361
362 return sec + usec;
363}
364
fd11d7af
JA
365static unsigned long utime_since_now(struct timeval *s)
366{
367 struct timeval t;
368
369 gettimeofday(&t, NULL);
370 return utime_since(s, &t);
371}
372
5c24b2c4 373static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
374{
375 double sec, usec;
376
377 sec = e->tv_sec - s->tv_sec;
378 usec = e->tv_usec - s->tv_usec;
379 if (sec > 0 && usec < 0) {
380 sec--;
381 usec += 1000000;
382 }
383
384 sec *= (double) 1000;
385 usec /= (double) 1000;
386
387 return sec + usec;
388}
389
be33abe4
JA
390static unsigned long mtime_since_now(struct timeval *s)
391{
392 struct timeval t;
393
394 gettimeofday(&t, NULL);
395 return mtime_since(s, &t);
396}
397
98168d55
JA
398static inline unsigned long msec_now(struct timeval *s)
399{
400 return s->tv_sec * 1000 + s->tv_usec / 1000;
401}
402
49d2caab
JA
403static int random_map_free(struct thread_data *td, unsigned long long block)
404{
75b2ab2c
JA
405 unsigned int idx = RAND_MAP_IDX(td, block);
406 unsigned int bit = RAND_MAP_BIT(td, block);
49d2caab
JA
407
408 return (td->file_map[idx] & (1UL << bit)) == 0;
409}
410
411static int get_next_free_block(struct thread_data *td, unsigned long long *b)
892199bd 412{
49d2caab
JA
413 int i;
414
415 *b = 0;
416 i = 0;
417 while ((*b) * td->min_bs < td->io_size) {
418 if (td->file_map[i] != -1UL) {
419 *b += ffz(td->file_map[i]);
420 return 0;
421 }
422
423 *b += BLOCKS_PER_MAP;
424 i++;
425 }
426
427 return 1;
428}
429
430static void mark_random_map(struct thread_data *td, struct io_u *io_u)
431{
432 unsigned long block = io_u->offset / td->min_bs;
433 unsigned int blocks = 0;
434
435 while (blocks < (io_u->buflen / td->min_bs)) {
436 int idx, bit;
437
438 if (!random_map_free(td, block))
439 break;
440
75b2ab2c
JA
441 idx = RAND_MAP_IDX(td, block);
442 bit = RAND_MAP_BIT(td, block);
49d2caab
JA
443
444 assert(idx < td->num_maps);
445
446 td->file_map[idx] |= (1UL << bit);
447 block++;
448 blocks++;
449 }
450
451 if ((blocks * td->min_bs) < io_u->buflen)
452 io_u->buflen = blocks * td->min_bs;
453}
454
455static int get_next_offset(struct thread_data *td, unsigned long long *offset)
456{
75b2ab2c 457 unsigned long long b, rb;
d32d9284 458 long r;
892199bd
JA
459
460 if (!td->sequential) {
49d2caab
JA
461 unsigned long max_blocks = td->io_size / td->min_bs;
462 int loops = 50;
463
464 do {
465 lrand48_r(&td->random_state, &r);
466 b = ((max_blocks - 1) * r / (RAND_MAX+1.0));
75b2ab2c 467 rb = b + (td->file_offset / td->min_bs);
49d2caab 468 loops--;
75b2ab2c 469 } while (!random_map_free(td, rb) && loops);
49d2caab
JA
470
471 if (!loops) {
472 if (get_next_free_block(td, &b))
473 return 1;
474 }
7889f07b 475 } else
49d2caab 476 b = td->last_bytes / td->min_bs;
7889f07b 477
49d2caab 478 *offset = (b * td->min_bs) + td->file_offset;
75b2ab2c
JA
479 if (*offset > td->file_size)
480 return 1;
481
49d2caab 482 return 0;
7889f07b
JA
483}
484
485static unsigned int get_next_buflen(struct thread_data *td)
486{
487 unsigned int buflen;
d32d9284 488 long r;
7889f07b
JA
489
490 if (td->min_bs == td->max_bs)
491 buflen = td->min_bs;
492 else {
d32d9284 493 lrand48_r(&td->bsrange_state, &r);
7889f07b
JA
494 buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
495 buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
892199bd
JA
496 }
497
49d2caab
JA
498 if (buflen > td->io_size - td->this_io_bytes)
499 buflen = td->io_size - td->this_io_bytes;
7889f07b 500
7889f07b 501 return buflen;
892199bd
JA
502}
503
57d753e3
JA
504static inline void add_stat_sample(struct thread_data *td, struct io_stat *is,
505 unsigned long val)
892199bd 506{
57d753e3
JA
507 if (val > is->max_val)
508 is->max_val = val;
509 if (val < is->min_val)
510 is->min_val = val;
511
512 is->val += val;
513 is->val_sq += val * val;
514 is->samples++;
515}
fd1ae4c9 516
a0a9b35b
JA
517static void add_log_sample(struct thread_data *td, struct io_log *log,
518 unsigned long val)
519{
520 if (log->nr_samples == log->max_samples) {
521 int new_size = sizeof(struct io_sample) * log->max_samples * 2;
522
523 log->log = realloc(log->log, new_size);
524 log->max_samples <<= 1;
525 }
526
527 log->log[log->nr_samples].val = val;
528 log->log[log->nr_samples].time = mtime_since_now(&td->start);
529 log->nr_samples++;
530}
531
57d753e3
JA
532static void add_clat_sample(struct thread_data *td, unsigned long msec)
533{
534 add_stat_sample(td, &td->clat_stat, msec);
a0a9b35b
JA
535
536 if (td->lat_log)
537 add_log_sample(td, td->lat_log, msec);
57d753e3 538}
fd1ae4c9 539
57d753e3
JA
540static void add_slat_sample(struct thread_data *td, unsigned long msec)
541{
542 add_stat_sample(td, &td->slat_stat, msec);
543}
fd1ae4c9 544
645785e5 545static void add_bw_sample(struct thread_data *td)
57d753e3
JA
546{
547 unsigned long spent = mtime_since_now(&td->stat_sample_time);
548 unsigned long rate;
549
1d035750 550 if (spent < td->bw_avg_time)
57d753e3
JA
551 return;
552
49d2caab 553 rate = (td->this_io_bytes - td->stat_io_bytes) / spent;
57d753e3
JA
554 add_stat_sample(td, &td->bw_stat, rate);
555
a0a9b35b
JA
556 if (td->bw_log)
557 add_log_sample(td, td->bw_log, rate);
558
57d753e3 559 gettimeofday(&td->stat_sample_time, NULL);
49d2caab 560 td->stat_io_bytes = td->this_io_bytes;
892199bd
JA
561}
562
fd11d7af
JA
563/*
564 * busy looping version for the last few usec
565 */
566static void __usec_sleep(int usec)
567{
568 struct timeval start;
569
570 gettimeofday(&start, NULL);
571 while (utime_since_now(&start) < usec)
572 __asm__ __volatile__("rep;nop": : :"memory");
573}
574
5c24b2c4 575static void usec_sleep(int usec)
892199bd 576{
86184d14
JA
577 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
578 struct timespec rem;
892199bd
JA
579
580 do {
fd11d7af
JA
581 if (usec < 5000) {
582 __usec_sleep(usec);
583 break;
584 }
86184d14
JA
585 rem.tv_sec = rem.tv_nsec = 0;
586 nanosleep(&req, &rem);
587 if (!rem.tv_nsec)
892199bd 588 break;
86184d14
JA
589
590 req.tv_nsec = rem.tv_nsec;
fd11d7af 591 usec = rem.tv_nsec * 1000;
892199bd
JA
592 } while (1);
593}
594
9e850933
JA
595static void rate_throttle(struct thread_data *td, unsigned long time_spent,
596 unsigned int bytes)
86184d14 597{
9e850933
JA
598 unsigned long usec_cycle;
599
4240cfa1
JA
600 if (!td->rate)
601 return;
602
9e850933
JA
603 usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
604
605 if (time_spent < usec_cycle) {
606 unsigned long s = usec_cycle - time_spent;
86184d14
JA
607
608 td->rate_pending_usleep += s;
fad86e6a 609 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
610 usec_sleep(td->rate_pending_usleep);
611 td->rate_pending_usleep = 0;
612 }
4240cfa1 613 } else {
9e850933 614 long overtime = time_spent - usec_cycle;
42b2b9fe 615
4240cfa1
JA
616 td->rate_pending_usleep -= overtime;
617 }
618}
619
5c24b2c4 620static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 621{
7607bc6b 622 unsigned long spent;
4240cfa1
JA
623 unsigned long rate;
624
625 /*
626 * allow a 2 second settle period in the beginning
627 */
7607bc6b 628 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
629 return 0;
630
631 /*
632 * if rate blocks is set, sample is running
633 */
49d2caab 634 if (td->rate_bytes) {
4240cfa1
JA
635 spent = mtime_since(&td->lastrate, now);
636 if (spent < td->ratecycle)
637 return 0;
638
49d2caab 639 rate = (td->this_io_bytes - td->rate_bytes) / spent;
4240cfa1
JA
640 if (rate < td->ratemin) {
641 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 642 if (rate_quit)
e6402082 643 sig_handler(0);
4240cfa1
JA
644 return 1;
645 }
86184d14 646 }
4240cfa1 647
49d2caab 648 td->rate_bytes = td->this_io_bytes;
4240cfa1
JA
649 memcpy(&td->lastrate, now, sizeof(*now));
650 return 0;
86184d14
JA
651}
652
67903a2e
JA
653static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
654{
01f79976
JA
655 if (!td->timeout)
656 return 0;
67903a2e
JA
657 if (mtime_since(&td->start, t) >= td->timeout * 1000)
658 return 1;
659
660 return 0;
661}
662
e8457004
JA
663static void fill_random_bytes(struct thread_data *td,
664 unsigned char *p, unsigned int len)
665{
645785e5 666 unsigned int todo;
40ef7f64 667 double r;
e8457004
JA
668
669 while (len) {
40ef7f64 670 drand48_r(&td->verify_state, &r);
e8457004 671
40ef7f64
JA
672 /*
673 * lrand48_r seems to be broken and only fill the bottom
674 * 32-bits, even on 64-bit archs with 64-bit longs
675 */
676 todo = sizeof(r);
e8457004
JA
677 if (todo > len)
678 todo = len;
679
680 memcpy(p, &r, todo);
681
682 len -= todo;
683 p += todo;
684 }
685}
686
9d0c6ca2
JA
687static void hexdump(void *buffer, int len)
688{
689 unsigned char *p = buffer;
690 int i;
691
692 for (i = 0; i < len; i++)
693 printf("%02x", p[i]);
694 printf("\n");
695}
696
645785e5 697static int verify_io_u(struct io_u *io_u)
e8457004
JA
698{
699 struct verify_header *hdr = (struct verify_header *) io_u->buf;
700 unsigned char *p = (unsigned char *) io_u->buf;
701 struct md5_ctx md5_ctx;
9d0c6ca2 702 int ret;
e8457004 703
840b216f 704 if (hdr->fio_magic != FIO_HDR_MAGIC)
e8457004
JA
705 return 1;
706
707 memset(&md5_ctx, 0, sizeof(md5_ctx));
708 p += sizeof(*hdr);
709 md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
710
9d0c6ca2
JA
711 ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
712 if (ret) {
713 hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
714 hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
715 }
716
717 return ret;
e8457004
JA
718}
719
cfc702bd
JA
720/*
721 * fill body of io_u->buf with random data and add a header with the
722 * (eg) sha1sum of that data.
723 */
e8457004 724static void populate_io_u(struct thread_data *td, struct io_u *io_u)
cfc702bd 725{
e8457004
JA
726 struct md5_ctx md5_ctx;
727 struct verify_header hdr;
728 unsigned char *p = (unsigned char *) io_u->buf;
729
730 hdr.fio_magic = FIO_HDR_MAGIC;
731 hdr.len = io_u->buflen;
732 p += sizeof(hdr);
733 fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
734
735 memset(&md5_ctx, 0, sizeof(md5_ctx));
736 md5_update(&md5_ctx, p, io_u->buflen - sizeof(hdr));
737 memcpy(hdr.md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
738 memcpy(io_u->buf, &hdr, sizeof(hdr));
cfc702bd
JA
739}
740
2c83567e
JA
741static void put_io_u(struct thread_data *td, struct io_u *io_u)
742{
743 list_del(&io_u->list);
744 list_add(&io_u->list, &td->io_u_freelist);
745 td->cur_depth--;
746}
747
f0f3411b
JA
748#define queue_full(td) (list_empty(&(td)->io_u_freelist))
749
e8457004
JA
750static struct io_u *__get_io_u(struct thread_data *td)
751{
752 struct io_u *io_u;
753
f0f3411b 754 if (queue_full(td))
e8457004
JA
755 return NULL;
756
757 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
758 list_del(&io_u->list);
759 list_add(&io_u->list, &td->io_u_busylist);
f4bb2243 760 td->cur_depth++;
e8457004
JA
761 return io_u;
762}
763
2c83567e
JA
764static struct io_u *get_io_u(struct thread_data *td)
765{
766 struct io_u *io_u;
767
e8457004
JA
768 io_u = __get_io_u(td);
769 if (!io_u)
2c83567e
JA
770 return NULL;
771
406e7b7c
JA
772 if (get_next_offset(td, &io_u->offset)) {
773 put_io_u(td, io_u);
49d2caab 774 return NULL;
406e7b7c 775 }
49d2caab 776
b2a369fb
JA
777 io_u->buflen = get_next_buflen(td);
778 if (!io_u->buflen) {
e8457004 779 put_io_u(td, io_u);
7889f07b 780 return NULL;
e8457004 781 }
2c83567e 782
75b2ab2c
JA
783 if (io_u->buflen + io_u->offset > td->file_size)
784 io_u->buflen = td->file_size - io_u->offset;
49d2caab
JA
785
786 if (!td->sequential)
787 mark_random_map(td, io_u);
788
789 td->last_bytes += io_u->buflen;
790
9d0c6ca2 791 if (td->verify)
e8457004 792 populate_io_u(td, io_u);
cfc702bd 793
2c83567e
JA
794 if (td->use_aio) {
795 if (td_read(td))
796 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
797 else
798 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
799 }
800
57d753e3 801 gettimeofday(&io_u->start_time, NULL);
2c83567e
JA
802 return io_u;
803}
804
40ef7f64
JA
805static inline void td_set_runstate(struct thread_data *td, int runstate)
806{
807 td->old_runstate = td->runstate;
808 td->runstate = runstate;
809}
810
645785e5
JA
811static int get_next_verify(struct thread_data *td,
812 unsigned long long *offset, unsigned int *len)
813{
814 struct io_piece *ipo;
815
816 if (list_empty(&td->io_hist_list))
817 return 1;
818
819 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
820 list_del(&ipo->list);
821
822 *offset = ipo->offset;
823 *len = ipo->len;
824 free(ipo);
825 return 0;
826}
827
9d0c6ca2
JA
828static void prune_io_piece_log(struct thread_data *td)
829{
830 struct io_piece *ipo;
831
832 while (!list_empty(&td->io_hist_list)) {
833 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
834
835 list_del(&ipo->list);
836 free(ipo);
837 }
838}
839
9d0c6ca2
JA
840/*
841 * log a succesful write, so we can unwind the log for verify
842 */
843static void log_io_piece(struct thread_data *td, struct io_u *io_u)
844{
49d2caab 845 struct io_piece *ipo = malloc(sizeof(struct io_piece));
9d0c6ca2
JA
846 struct list_head *entry;
847
848 INIT_LIST_HEAD(&ipo->list);
849 ipo->offset = io_u->offset;
850 ipo->len = io_u->buflen;
851
49d2caab
JA
852 /*
853 * for random io where the writes extend the file, it will typically
854 * be laid out with the block scattered as written. it's faster to
855 * read them in in that order again, so don't sort
856 */
857 if (td->sequential || !td->overwrite) {
9d0c6ca2
JA
858 list_add_tail(&ipo->list, &td->io_hist_list);
859 return;
860 }
861
862 /*
863 * for random io, sort the list so verify will run faster
864 */
865 entry = &td->io_hist_list;
866 while ((entry = entry->prev) != &td->io_hist_list) {
867 struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
868
9d0c6ca2
JA
869 if (__ipo->offset < ipo->offset)
870 break;
871 }
872
873 list_add(&ipo->list, entry);
874}
875
91fc5dc9 876static void do_sync_verify(struct thread_data *td)
cfc702bd 877{
40ef7f64 878 struct timeval t;
e8457004 879 struct io_u *io_u = NULL;
645785e5 880 int ret;
e8457004 881
40ef7f64 882 td_set_runstate(td, TD_VERIFYING);
e8457004
JA
883
884 io_u = __get_io_u(td);
885
40ef7f64 886 if (!td->odirect) {
49d2caab 887 if (fadvise(td->fd, td->file_offset, td->io_size, POSIX_FADV_DONTNEED) < 0) {
40ef7f64
JA
888 td->error = errno;
889 goto out;
890 }
891 }
892
e8457004
JA
893 do {
894 if (td->terminate)
895 break;
40ef7f64
JA
896
897 gettimeofday(&t, NULL);
898 if (runtime_exceeded(td, &t))
899 break;
900
645785e5
JA
901 if (get_next_verify(td, &io_u->offset, &io_u->buflen))
902 break;
903
904 if (td->cur_off != io_u->offset) {
905 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
906 td->error = errno;
907 break;
908 }
909 }
e8457004
JA
910
911 ret = read(td->fd, io_u->buf, io_u->buflen);
912 if (ret < (int) io_u->buflen) {
913 if (ret == -1) {
914 td->error = errno;
915 break;
916 } else if (!ret)
917 break;
918 else
919 io_u->buflen = ret;
920 }
921
645785e5 922 if (verify_io_u(io_u))
e8457004
JA
923 break;
924
645785e5 925 td->cur_off = io_u->offset + io_u->buflen;
e8457004
JA
926 } while (1);
927
928out:
40ef7f64 929 td_set_runstate(td, TD_RUNNING);
e8457004 930 put_io_u(td, io_u);
cfc702bd
JA
931}
932
43000118 933static void do_sync_io(struct thread_data *td)
892199bd 934{
7889f07b 935 unsigned long msec, usec;
e8457004 936 struct io_u *io_u = NULL;
2c83567e 937 struct timeval e;
892199bd 938
49d2caab 939 while (td->this_io_bytes < td->io_size) {
892199bd
JA
940 int ret;
941
942 if (td->terminate)
943 break;
944
2c83567e 945 io_u = get_io_u(td);
7889f07b
JA
946 if (!io_u)
947 break;
2c83567e 948
63a09e51
JA
949 if (td->cur_off != io_u->offset) {
950 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
951 td->error = errno;
952 break;
953 }
892199bd
JA
954 }
955
02983297 956 if (td_read(td))
2c83567e 957 ret = read(td->fd, io_u->buf, io_u->buflen);
892199bd 958 else
2c83567e 959 ret = write(td->fd, io_u->buf, io_u->buflen);
892199bd 960
2c83567e 961 if (ret < (int) io_u->buflen) {
892199bd
JA
962 if (ret == -1)
963 td->error = errno;
964 break;
965 }
966
62bb4285 967 if (td_write(td))
645785e5
JA
968 log_io_piece(td, io_u);
969
4240cfa1 970 td->io_blocks++;
49d2caab
JA
971 td->io_bytes += io_u->buflen;
972 td->this_io_bytes += io_u->buflen;
63a09e51 973 td->cur_off = io_u->offset + io_u->buflen;
4240cfa1 974
86184d14
JA
975 gettimeofday(&e, NULL);
976
57d753e3 977 usec = utime_since(&io_u->start_time, &e);
86184d14 978
9e850933 979 rate_throttle(td, usec, io_u->buflen);
892199bd 980
4240cfa1
JA
981 if (check_min_rate(td, &e)) {
982 td->error = ENODATA;
983 break;
984 }
892199bd 985
4240cfa1 986 msec = usec / 1000;
57d753e3 987 add_clat_sample(td, msec);
645785e5 988 add_bw_sample(td);
67903a2e
JA
989
990 if (runtime_exceeded(td, &e))
991 break;
2c83567e 992
cdf92433 993 put_io_u(td, io_u);
e8457004 994 io_u = NULL;
cdf92433 995
e97712ed
JA
996 if (td->thinktime)
997 usec_sleep(td->thinktime);
998
cdf92433
JA
999 if (should_fsync(td) && td->fsync_blocks &&
1000 (td->io_blocks % td->fsync_blocks) == 0)
1001 fsync(td->fd);
892199bd
JA
1002 }
1003
e8457004
JA
1004 if (io_u)
1005 put_io_u(td, io_u);
1006
4240cfa1 1007 if (should_fsync(td))
892199bd 1008 fsync(td->fd);
892199bd 1009}
43000118 1010
1ad72b11
JA
1011static int io_u_getevents(struct thread_data *td, int min, int max,
1012 struct timespec *t)
1013{
1014 int r;
1015
1016 do {
1017 r = io_getevents(td->aio_ctx, min, max, td->aio_events, t);
1018 if (r != -EAGAIN && r != -EINTR)
1019 break;
1020 } while (1);
1021
1022 return r;
1023}
1024
2c83567e 1025static int io_u_queue(struct thread_data *td, struct io_u *io_u)
56b0eff0 1026{
2c83567e 1027 struct iocb *iocb = &io_u->iocb;
56b0eff0
JA
1028 int ret;
1029
1030 do {
254605cd 1031 ret = io_submit(td->aio_ctx, 1, &iocb);
56b0eff0
JA
1032 if (ret == 1)
1033 return 0;
406e7b7c 1034 else if (ret == -EAGAIN)
56b0eff0 1035 usleep(100);
406e7b7c 1036 else if (ret == -EINTR)
a592bd33 1037 continue;
56b0eff0
JA
1038 else
1039 break;
1040 } while (1);
1041
a592bd33 1042 return ret;
56b0eff0
JA
1043}
1044
98168d55 1045#define iocb_time(iocb) ((unsigned long) (iocb)->data)
2c83567e
JA
1046#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
1047
f0f3411b 1048static int ios_completed(struct thread_data *td, int nr)
2c83567e
JA
1049{
1050 unsigned long msec;
1051 struct io_u *io_u;
1052 struct timeval e;
9e850933 1053 int i, bytes_done;
2c83567e 1054
f0f3411b 1055 gettimeofday(&e, NULL);
2c83567e 1056
9e850933 1057 for (i = 0, bytes_done = 0; i < nr; i++) {
2c83567e
JA
1058 io_u = ev_to_iou(td->aio_events + i);
1059
f0f3411b 1060 td->io_blocks++;
49d2caab
JA
1061 td->io_bytes += io_u->buflen;
1062 td->this_io_bytes += io_u->buflen;
8c033f93 1063
f0f3411b 1064 msec = mtime_since(&io_u->issue_time, &e);
2c83567e 1065
f0f3411b 1066 add_clat_sample(td, msec);
645785e5
JA
1067 add_bw_sample(td);
1068
62bb4285 1069 if (td_write(td))
645785e5 1070 log_io_piece(td, io_u);
2c83567e 1071
f4bb2243 1072 bytes_done += io_u->buflen;
2c83567e
JA
1073 put_io_u(td, io_u);
1074 }
9e850933
JA
1075
1076 return bytes_done;
2c83567e
JA
1077}
1078
1079static void cleanup_pending_aio(struct thread_data *td)
1080{
1081 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
1082 struct list_head *entry, *n;
1083 struct io_u *io_u;
1084 int r;
1085
1086 /*
1087 * get immediately available events, if any
1088 */
1ad72b11 1089 r = io_u_getevents(td, 0, td->cur_depth, &ts);
2c83567e 1090 if (r > 0)
f0f3411b 1091 ios_completed(td, r);
2c83567e
JA
1092
1093 /*
1094 * now cancel remaining active events
1095 */
1096 list_for_each_safe(entry, n, &td->io_u_busylist) {
1097 io_u = list_entry(entry, struct io_u, list);
1098
1099 r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events);
1100 if (!r)
1101 put_io_u(td, io_u);
1102 }
1103
1104 if (td->cur_depth) {
1ad72b11 1105 r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
2c83567e 1106 if (r > 0)
f0f3411b 1107 ios_completed(td, r);
2c83567e
JA
1108 }
1109}
98168d55 1110
d32d9284
JA
1111static int async_do_verify(struct thread_data *td, struct io_u **io_u)
1112{
1113 struct io_u *v_io_u = *io_u;
1114 int ret = 0;
1115
1116 if (v_io_u) {
645785e5 1117 ret = verify_io_u(v_io_u);
d32d9284
JA
1118 put_io_u(td, v_io_u);
1119 *io_u = NULL;
1120 }
1121
1122 return ret;
1123}
1124
91fc5dc9 1125static void do_async_verify(struct thread_data *td)
cfc702bd 1126{
f4bb2243 1127 struct timeval t;
d32d9284 1128 struct io_u *io_u, *v_io_u = NULL;
645785e5 1129 int ret;
f4bb2243
JA
1130
1131 td_set_runstate(td, TD_VERIFYING);
1132
f4bb2243
JA
1133 do {
1134 if (td->terminate)
1135 break;
1136
1137 gettimeofday(&t, NULL);
1138 if (runtime_exceeded(td, &t))
1139 break;
1140
1141 io_u = __get_io_u(td);
1142 if (!io_u)
1143 break;
1144
645785e5
JA
1145 if (get_next_verify(td, &io_u->offset, &io_u->buflen)) {
1146 put_io_u(td, io_u);
1147 break;
f4bb2243
JA
1148 }
1149
1150 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
1151 ret = io_u_queue(td, io_u);
1152 if (ret) {
1153 put_io_u(td, io_u);
1154 td->error = ret;
1155 break;
1156 }
1157
f0f3411b
JA
1158 /*
1159 * we have one pending to verify, do that while the next
1160 * we are doing io on the next one
1161 */
d32d9284
JA
1162 if (async_do_verify(td, &v_io_u))
1163 break;
f0f3411b 1164
1ad72b11 1165 ret = io_u_getevents(td, 1, 1, NULL);
f0f3411b
JA
1166 if (ret != 1) {
1167 if (ret < 0)
1168 td->error = ret;
f4bb2243
JA
1169 break;
1170 }
1171
f0f3411b 1172 v_io_u = ev_to_iou(td->aio_events);
f4bb2243 1173
645785e5 1174 td->cur_off = v_io_u->offset + v_io_u->buflen;
f0f3411b
JA
1175
1176 /*
d32d9284 1177 * if we can't submit more io, we need to verify now
f0f3411b 1178 */
d32d9284
JA
1179 if (queue_full(td) && async_do_verify(td, &v_io_u))
1180 break;
1181
f4bb2243
JA
1182 } while (1);
1183
d32d9284 1184 async_do_verify(td, &v_io_u);
f0f3411b 1185
f4bb2243
JA
1186 if (td->cur_depth)
1187 cleanup_pending_aio(td);
1188
1189 td_set_runstate(td, TD_RUNNING);
cfc702bd
JA
1190}
1191
43000118
JA
1192static void do_async_io(struct thread_data *td)
1193{
1194 struct timeval s, e;
7889f07b 1195 unsigned long usec;
43000118 1196
49d2caab 1197 while (td->this_io_bytes < td->io_size) {
43000118
JA
1198 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
1199 struct timespec *timeout;
2c83567e
JA
1200 int ret, min_evts = 0;
1201 struct io_u *io_u;
9e850933 1202 unsigned int bytes_done;
43000118
JA
1203
1204 if (td->terminate)
1205 break;
1206
2c83567e 1207 io_u = get_io_u(td);
7889f07b
JA
1208 if (!io_u)
1209 break;
43000118 1210
57d753e3 1211 memcpy(&s, &io_u->start_time, sizeof(s));
8baf1bcc 1212
2c83567e 1213 ret = io_u_queue(td, io_u);
56b0eff0 1214 if (ret) {
a3fdb993 1215 put_io_u(td, io_u);
a592bd33 1216 td->error = ret;
43000118
JA
1217 break;
1218 }
1219
57d753e3
JA
1220 gettimeofday(&io_u->issue_time, NULL);
1221 add_slat_sample(td, mtime_since(&io_u->start_time, &io_u->issue_time));
2c83567e 1222 if (td->cur_depth < td->aio_depth) {
43000118
JA
1223 timeout = &ts;
1224 min_evts = 0;
1225 } else {
1226 timeout = NULL;
1227 min_evts = 1;
1228 }
1229
1ad72b11 1230 ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
43000118 1231 if (ret < 0) {
406e7b7c 1232 td->error = ret;
43000118
JA
1233 break;
1234 } else if (!ret)
1235 continue;
1236
f0f3411b 1237 bytes_done = ios_completed(td, ret);
43000118 1238
98168d55
JA
1239 /*
1240 * the rate is batched for now, it should work for batches
1241 * of completions except the very first one which may look
1242 * a little bursty
1243 */
2c83567e 1244 gettimeofday(&e, NULL);
43000118
JA
1245 usec = utime_since(&s, &e);
1246
9e850933 1247 rate_throttle(td, usec, bytes_done);
43000118
JA
1248
1249 if (check_min_rate(td, &e)) {
1250 td->error = ENODATA;
1251 break;
1252 }
67903a2e
JA
1253
1254 if (runtime_exceeded(td, &e))
1255 break;
765d9223
JA
1256
1257 if (td->thinktime)
1258 usec_sleep(td->thinktime);
cdf92433
JA
1259
1260 if (should_fsync(td) && td->fsync_blocks &&
1261 (td->io_blocks % td->fsync_blocks) == 0)
1262 fsync(td->fd);
43000118 1263 }
56b0eff0 1264
2c83567e
JA
1265 if (td->cur_depth)
1266 cleanup_pending_aio(td);
4ac89145
JA
1267
1268 if (should_fsync(td))
1269 fsync(td->fd);
56b0eff0
JA
1270}
1271
1272static void cleanup_aio(struct thread_data *td)
1273{
254605cd
JA
1274 io_destroy(td->aio_ctx);
1275
43000118
JA
1276 if (td->aio_events)
1277 free(td->aio_events);
43000118
JA
1278}
1279
1280static int init_aio(struct thread_data *td)
1281{
254605cd 1282 if (io_queue_init(td->aio_depth, &td->aio_ctx)) {
43000118
JA
1283 td->error = errno;
1284 return 1;
1285 }
1286
43000118 1287 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
43000118
JA
1288 return 0;
1289}
1290
2c83567e
JA
1291static void cleanup_io_u(struct thread_data *td)
1292{
1293 struct list_head *entry, *n;
1294 struct io_u *io_u;
1295
1296 list_for_each_safe(entry, n, &td->io_u_freelist) {
1297 io_u = list_entry(entry, struct io_u, list);
1298
1299 list_del(&io_u->list);
2c83567e
JA
1300 free(io_u);
1301 }
6b71c826 1302
99c6704f
JA
1303 if (td->mem_type == MEM_MALLOC)
1304 free(td->orig_buffer);
1305 else if (td->mem_type == MEM_SHM) {
1306 struct shmid_ds sbuf;
1307
1308 shmdt(td->orig_buffer);
1309 shmctl(td->shm_id, IPC_RMID, &sbuf);
1310 }
2c83567e
JA
1311}
1312
99c6704f 1313static int init_io_u(struct thread_data *td)
2c83567e
JA
1314{
1315 struct io_u *io_u;
99c6704f 1316 int i, max_units, mem_size;
6b71c826 1317 char *p;
2c83567e
JA
1318
1319 if (!td->use_aio)
1320 max_units = 1;
1321 else
1322 max_units = td->aio_depth;
1323
7889f07b 1324 mem_size = td->max_bs * max_units + MASK;
99c6704f
JA
1325
1326 if (td->mem_type == MEM_MALLOC)
1327 td->orig_buffer = malloc(mem_size);
1328 else if (td->mem_type == MEM_SHM) {
1329 td->shm_id = shmget(IPC_PRIVATE, mem_size, IPC_CREAT | 0600);
1330 if (td->shm_id < 0) {
1331 td->error = errno;
1332 perror("shmget");
1333 return 1;
1334 }
1335
1336 td->orig_buffer = shmat(td->shm_id, NULL, 0);
1337 if (td->orig_buffer == (void *) -1) {
1338 td->error = errno;
1339 perror("shmat");
1340 return 1;
1341 }
1342 }
6b71c826 1343
2c83567e
JA
1344 INIT_LIST_HEAD(&td->io_u_freelist);
1345 INIT_LIST_HEAD(&td->io_u_busylist);
645785e5 1346 INIT_LIST_HEAD(&td->io_hist_list);
2c83567e 1347
99c6704f 1348 p = ALIGN(td->orig_buffer);
2c83567e
JA
1349 for (i = 0; i < max_units; i++) {
1350 io_u = malloc(sizeof(*io_u));
1351 memset(io_u, 0, sizeof(*io_u));
1352 INIT_LIST_HEAD(&io_u->list);
1353
7889f07b 1354 io_u->buf = p + td->max_bs * i;
2c83567e
JA
1355 list_add(&io_u->list, &td->io_u_freelist);
1356 }
99c6704f
JA
1357
1358 return 0;
2c83567e
JA
1359}
1360
a0a9b35b
JA
1361static void setup_log(struct io_log **log)
1362{
1363 struct io_log *l = malloc(sizeof(*l));
1364
1365 l->nr_samples = 0;
1366 l->max_samples = 1024;
1367 l->log = malloc(l->max_samples * sizeof(struct io_sample));
1368 *log = l;
1369}
1370
1371static void finish_log(struct thread_data *td, struct io_log *log, char *name)
1372{
1373 char file_name[128];
1374 FILE *f;
645785e5 1375 unsigned int i;
a0a9b35b
JA
1376
1377 sprintf(file_name, "client%d_%s.log", td->thread_number, name);
1378 f = fopen(file_name, "w");
1379 if (!f) {
1380 perror("fopen log");
1381 return;
1382 }
1383
1384 for (i = 0; i < log->nr_samples; i++)
1385 fprintf(f, "%lu, %lu\n", log->log[i].time, log->log[i].val);
1386
1387 fclose(f);
1388 free(log->log);
1389 free(log);
1390}
1391
02983297
JA
1392static int create_file(struct thread_data *td)
1393{
7889f07b 1394 unsigned long long left;
645785e5 1395 unsigned int bs;
02983297 1396 char *b;
645785e5 1397 int r;
02983297 1398
02983297
JA
1399 /*
1400 * unless specifically asked for overwrite, let normal io extend it
1401 */
62bb4285 1402 if (td_write(td) && !td->overwrite)
02983297
JA
1403 return 0;
1404
57d753e3
JA
1405 if (!td->file_size) {
1406 fprintf(stderr, "Need size for create\n");
1407 td->error = EINVAL;
1408 return 1;
1409 }
1410
42fd89a7
JA
1411 printf("Client%d: Laying out IO file\n", td->thread_number);
1412
02983297
JA
1413 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1414 if (td->fd < 0) {
1415 td->error = errno;
1416 return 1;
1417 }
1418
c94deb1c
JA
1419 if (ftruncate(td->fd, td->file_size) == -1) {
1420 td->error = errno;
1421 return 1;
1422 }
1423
49d2caab 1424 td->io_size = td->file_size;
7889f07b
JA
1425 b = malloc(td->max_bs);
1426 memset(b, 0, td->max_bs);
1427
1428 left = td->file_size;
1429 while (left) {
1430 bs = td->max_bs;
1431 if (bs > left)
1432 bs = left;
02983297 1433
7889f07b 1434 r = write(td->fd, b, bs);
02983297 1435
645785e5 1436 if (r == (int) bs) {
7889f07b 1437 left -= bs;
02983297 1438 continue;
7889f07b 1439 } else {
02983297
JA
1440 if (r < 0)
1441 td->error = errno;
1442 else
1443 td->error = EIO;
1444
1445 break;
1446 }
1447 }
1448
fc097bfe
JA
1449 if (td->create_fsync)
1450 fsync(td->fd);
1451
02983297
JA
1452 close(td->fd);
1453 td->fd = -1;
1454 free(b);
1455 return 0;
1456}
1457
1458static int file_exists(struct thread_data *td)
1459{
1460 struct stat st;
1461
1462 if (stat(td->file_name, &st) != -1)
1463 return 1;
1464
1465 return errno != ENOENT;
1466}
1467
c94deb1c 1468static int get_file_size(struct thread_data *td)
02983297 1469{
c94deb1c 1470 size_t bytes = 0;
02983297 1471 struct stat st;
c94deb1c
JA
1472
1473 if (fstat(td->fd, &st) == -1) {
1474 td->error = errno;
1475 return 1;
1476 }
1477
1478 /*
1479 * if block device, get size via BLKGETSIZE64 ioctl. try that as well
1480 * if this is a link, fall back to st.st_size if it fails
1481 */
1482 if (S_ISBLK(st.st_mode) || S_ISLNK(st.st_mode)) {
1483 if (ioctl(td->fd, BLKGETSIZE64, &bytes)) {
1484 if (S_ISBLK(st.st_mode)) {
1485 td->error = errno;
1486 return 1;
1487 } else
1488 bytes = st.st_size;
1489 }
1490 } else
1491 bytes = st.st_size;
1492
1493 if (td_read(td)) {
1494 if (td->file_size > bytes)
1495 bytes = td->file_size;
1496 } else {
1497 if (!td->file_size)
1498 td->file_size = 1024 * 1024 * 1024;
1499
1500 bytes = td->file_size;
1501 }
1502
1503 if (td->file_offset > bytes) {
1504 fprintf(stderr, "Client%d: offset larger than length\n", td->thread_number);
1505 return 1;
1506 }
1507
1508 td->io_size = bytes - td->file_offset;
1509 if (td->io_size == 0) {
1510 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
1511 td->error = EINVAL;
1512 return 1;
1513 }
1514
1515 return 0;
1516}
1517
1518static int setup_file(struct thread_data *td)
1519{
02983297
JA
1520 int flags = 0;
1521
1522 if (!file_exists(td)) {
1523 if (!td->create_file) {
1524 td->error = ENOENT;
1525 return 1;
1526 }
1527 if (create_file(td))
1528 return 1;
1529 }
1530
1531 if (td->odirect)
1532 flags |= O_DIRECT;
1533
1534 if (td_read(td))
1535 td->fd = open(td->file_name, flags | O_RDONLY);
1536 else {
1537 if (!td->overwrite)
1538 flags |= O_TRUNC;
74b4b5fb
JA
1539 if (td->sync_io)
1540 flags |= O_SYNC;
e8457004
JA
1541 if (td->verify)
1542 flags |= O_RDWR;
1543 else
1544 flags |= O_WRONLY;
02983297 1545
e8457004 1546 td->fd = open(td->file_name, flags | O_CREAT, 0600);
02983297
JA
1547 }
1548
1549 if (td->fd == -1) {
1550 td->error = errno;
1551 return 1;
1552 }
1553
c94deb1c 1554 if (get_file_size(td))
49d2caab 1555 return 1;
49d2caab 1556
62bb4285 1557 if (td_write(td) && ftruncate(td->fd, td->file_size) == -1) {
c94deb1c 1558 td->error = errno;
02983297
JA
1559 return 1;
1560 }
1561
b95799ca 1562 if (td->invalidate_cache) {
c94deb1c 1563 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
b95799ca
JA
1564 td->error = errno;
1565 return 1;
1566 }
1567 }
1568
02983297
JA
1569 return 0;
1570}
1571
d32d9284
JA
1572static void clear_io_state(struct thread_data *td)
1573{
9d0c6ca2
JA
1574 if (!td->use_aio)
1575 lseek(td->fd, SEEK_SET, 0);
1576
d32d9284 1577 td->cur_off = 0;
49d2caab
JA
1578 td->last_bytes = 0;
1579 td->stat_io_bytes = 0;
1580 td->this_io_bytes = 0;
1581
1582 if (td->file_map)
1583 memset(td->file_map, 0, td->num_maps * sizeof(long));
d32d9284
JA
1584}
1585
5c24b2c4 1586static void *thread_main(int shm_id, int offset, char *argv[])
892199bd
JA
1587{
1588 struct thread_data *td;
02983297 1589 int ret = 1;
2c83567e 1590 void *data;
892199bd 1591
7292613b
JA
1592 setsid();
1593
892199bd 1594 data = shmat(shm_id, NULL, 0);
4ac89145
JA
1595 if (data == (void *) -1) {
1596 perror("shmat");
1597 return NULL;
1598 }
1599
892199bd
JA
1600 td = data + offset * sizeof(struct thread_data);
1601 td->pid = getpid();
1602
99c6704f
JA
1603 if (init_io_u(td))
1604 goto err;
2c83567e 1605
18e0b78c
JA
1606 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
1607 td->error = errno;
1608 goto err;
1609 }
1610
4240cfa1 1611 sprintf(argv[0], "fio%d", offset);
892199bd 1612
43000118
JA
1613 if (td->use_aio && init_aio(td))
1614 goto err;
1615
f737299d 1616 if (td->ioprio) {
892199bd
JA
1617 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
1618 td->error = errno;
599002b3 1619 goto err;
892199bd
JA
1620 }
1621 }
1622
1623 sem_post(&startup_sem);
1624 sem_wait(&td->mutex);
43000118 1625
fc097bfe
JA
1626 if (!td->create_serialize && setup_file(td))
1627 goto err;
1628
49d2caab
JA
1629 if (init_random_state(td))
1630 goto err;
1631
7292613b
JA
1632 gettimeofday(&td->start, NULL);
1633
92b229ed
JA
1634 getrusage(RUSAGE_SELF, &td->ru_start);
1635
293753bb
JA
1636 while (td->loops--) {
1637 gettimeofday(&td->stat_sample_time, NULL);
1638
1639 if (td->ratemin)
1640 memcpy(&td->lastrate, &td->stat_sample_time, sizeof(td->lastrate));
7292613b 1641
d32d9284 1642 clear_io_state(td);
9d0c6ca2 1643 prune_io_piece_log(td);
fd1ae4c9 1644
b2de0ed2 1645 if (!td->use_aio)
b6794fbf 1646 do_sync_io(td);
b2de0ed2
JA
1647 else
1648 do_async_io(td);
1649
91fc5dc9
JA
1650 if (td->error)
1651 break;
1652
b2de0ed2
JA
1653 if (!td->verify)
1654 continue;
cfc702bd 1655
b2de0ed2 1656 clear_io_state(td);
d32d9284 1657
91fc5dc9
JA
1658 if (!td->use_aio)
1659 do_sync_verify(td);
1660 else
1661 do_async_verify(td);
1662
1663 if (td->error)
1664 break;
b6794fbf 1665 }
7292613b 1666
be33abe4 1667 td->runtime = mtime_since_now(&td->start);
92b229ed 1668 getrusage(RUSAGE_SELF, &td->ru_end);
892199bd 1669 ret = 0;
a0a9b35b
JA
1670
1671 if (td->bw_log)
1672 finish_log(td, td->bw_log, "bw");
1673 if (td->lat_log)
1674 finish_log(td, td->lat_log, "lat");
4ac89145 1675
98dd52d6
JA
1676 if (exitall_on_terminate)
1677 sig_handler(0);
1678
892199bd 1679err:
7292613b
JA
1680 if (td->fd != -1) {
1681 close(td->fd);
1682 td->fd = -1;
1683 }
4ac89145
JA
1684 if (td->use_aio)
1685 cleanup_aio(td);
2c83567e 1686 cleanup_io_u(td);
599002b3 1687 if (ret) {
892199bd 1688 sem_post(&startup_sem);
599002b3
JA
1689 sem_wait(&td->mutex);
1690 }
40ef7f64 1691 td_set_runstate(td, TD_EXITED);
4240cfa1 1692 shmdt(data);
892199bd
JA
1693 return NULL;
1694}
1695
5c24b2c4 1696static void free_shm(void)
892199bd 1697{
c269123b
JA
1698 struct shmid_ds sbuf;
1699
1700 if (threads) {
1701 shmdt(threads);
1702 threads = NULL;
1703 shmctl(shm_id, IPC_RMID, &sbuf);
1704 }
892199bd
JA
1705}
1706
57d753e3
JA
1707static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
1708 double *mean, double *dev)
1709{
1710 double n;
1711
1712 if (is->samples == 0)
1713 return 0;
1714
1715 *min = is->min_val;
1716 *max = is->max_val;
1717
1718 n = (double) is->samples;
1719 *mean = (double) is->val / n;
1720 *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
1721 return 1;
1722}
1723
5c24b2c4 1724static void show_thread_status(struct thread_data *td)
892199bd
JA
1725{
1726 int prio, prio_class;
92b229ed
JA
1727 unsigned long min, max, bw = 0, ctx;
1728 double mean, dev, usr_cpu, sys_cpu;
892199bd 1729
49d2caab 1730 if (!td->io_bytes && !td->error)
213b446c
JA
1731 return;
1732
892199bd 1733 if (td->runtime)
49d2caab 1734 bw = td->io_bytes / td->runtime;
892199bd
JA
1735
1736 prio = td->ioprio & 0xff;
1737 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
1738
49d2caab 1739 printf("Client%d: err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->error, td->io_bytes >> 20, bw, td->runtime);
fd1ae4c9 1740
57d753e3
JA
1741 if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev))
1742 printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1743 if (calc_lat(&td->clat_stat, &min, &max, &mean, &dev))
1744 printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1745 if (calc_lat(&td->bw_stat, &min, &max, &mean, &dev))
1746 printf(" bw (KiB/s) : min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
92b229ed
JA
1747
1748 if (td->runtime) {
1749 unsigned long t;
1750
1751 t = mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
1752 usr_cpu = (double) t * 100 / (double) td->runtime;
1753
1754 t = mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
1755 sys_cpu = (double) t * 100 / (double) td->runtime;
1756 } else {
1757 usr_cpu = 0;
1758 sys_cpu = 0;
1759 }
1760
1761 ctx = td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
1762
1763 printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, ctx);
892199bd
JA
1764}
1765
5c24b2c4 1766static int setup_rate(struct thread_data *td)
86184d14 1767{
4240cfa1
JA
1768 int nr_reads_per_sec;
1769
1770 if (!td->rate)
1771 return 0;
1772
1773 if (td->rate < td->ratemin) {
1774 fprintf(stderr, "min rate larger than nominal rate\n");
1775 return -1;
1776 }
86184d14 1777
49d2caab 1778 nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
86184d14
JA
1779 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
1780 td->rate_pending_usleep = 0;
4240cfa1 1781 return 0;
86184d14
JA
1782}
1783
fe74f555 1784static struct thread_data *get_new_job(int global, struct thread_data *parent)
892199bd 1785{
4240cfa1
JA
1786 struct thread_data *td;
1787
47d45203
JA
1788 if (global)
1789 return &def_thread;
8867c0a8 1790 if (thread_number >= max_jobs)
4240cfa1
JA
1791 return NULL;
1792
1793 td = &threads[thread_number++];
fc24389f 1794 memset(td, 0, sizeof(*td));
892199bd 1795
fe74f555
JA
1796 sprintf(td->directory, ".");
1797
e4ed35c3 1798 td->fd = -1;
86184d14 1799 td->thread_number = thread_number;
76cb7b42 1800
fe74f555
JA
1801 td->ddir = parent->ddir;
1802 td->ioprio = parent->ioprio;
1803 td->sequential = parent->sequential;
1804 td->bs = parent->bs;
1805 td->min_bs = parent->min_bs;
1806 td->max_bs = parent->max_bs;
1807 td->odirect = parent->odirect;
1808 td->thinktime = parent->thinktime;
1809 td->fsync_blocks = parent->fsync_blocks;
1810 td->start_delay = parent->start_delay;
1811 td->timeout = parent->timeout;
1812 td->use_aio = parent->use_aio;
1813 td->create_file = parent->create_file;
1814 td->overwrite = parent->overwrite;
1815 td->invalidate_cache = parent->invalidate_cache;
1816 td->file_size = parent->file_size;
1817 td->file_offset = parent->file_offset;
1818 td->rate = parent->rate;
1819 td->ratemin = parent->ratemin;
1820 td->ratecycle = parent->ratecycle;
1821 td->aio_depth = parent->aio_depth;
1822 td->sync_io = parent->sync_io;
1823 td->mem_type = parent->mem_type;
1824 td->bw_avg_time = parent->bw_avg_time;
1825 td->create_serialize = parent->create_serialize;
1826 td->create_fsync = parent->create_fsync;
1827 td->loops = parent->loops;
1828 td->verify = parent->verify;
1829 td->stonewall = parent->stonewall;
1830 td->numjobs = parent->numjobs;
1831 memcpy(&td->cpumask, &parent->cpumask, sizeof(td->cpumask));
f737299d
JA
1832
1833 return td;
1834}
1835
4240cfa1
JA
1836static void put_job(struct thread_data *td)
1837{
1838 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
1839 thread_number--;
1840}
1841
fe74f555 1842static int add_job(struct thread_data *td, const char *jobname, int prioclass,
5c24b2c4 1843 int prio)
f737299d 1844{
fe74f555
JA
1845 int numjobs;
1846
47d45203
JA
1847 if (td == &def_thread)
1848 return 0;
1849
fe74f555 1850 sprintf(td->file_name, "%s/%s.%d", td->directory, jobname, td->thread_number);
4240cfa1 1851 sem_init(&td->mutex, 1, 0);
f737299d
JA
1852 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
1853
57d753e3
JA
1854 td->clat_stat.min_val = ULONG_MAX;
1855 td->slat_stat.min_val = ULONG_MAX;
1856 td->bw_stat.min_val = ULONG_MAX;
1857
8dbff0b1
JA
1858 run_str[td->thread_number - 1] = 'P';
1859
4ac89145
JA
1860 if (td->use_aio && !td->aio_depth)
1861 td->aio_depth = 1;
43000118 1862
645785e5 1863 if (td->min_bs == -1U)
8bdcfab5 1864 td->min_bs = td->bs;
645785e5 1865 if (td->max_bs == -1U)
8bdcfab5 1866 td->max_bs = td->bs;
645785e5 1867 if (td_read(td))
840b216f 1868 td->verify = 0;
8bdcfab5 1869
4240cfa1 1870 if (setup_rate(td))
fe74f555 1871 goto err;
f737299d 1872
a0a9b35b
JA
1873 if (write_lat_log)
1874 setup_log(&td->lat_log);
1875 if (write_bw_log)
1876 setup_log(&td->bw_log);
1877
fe74f555
JA
1878 printf("Client%d: rw=%d, prio=%d/%d, seq=%d, odir=%d, bs=%d-%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, td->ddir, prioclass, prio, td->sequential, td->odirect, td->min_bs, td->max_bs, td->rate, td->use_aio, td->aio_depth);
1879
1880 /*
1881 * recurse add identical jobs, clear numjobs and stonewall options
1882 * as they don't apply to sub-jobs
1883 */
1884 numjobs = td->numjobs;
1885 while (--numjobs) {
1886 struct thread_data *td_new = get_new_job(0, td);
1887
1888 if (!td_new)
1889 break;
1890
1891 td_new->numjobs = 1;
1892 td_new->stonewall = 0;
1893
1894 if (add_job(td_new, jobname, prioclass, prio))
1895 break;
1896 }
4240cfa1 1897 return 0;
fe74f555
JA
1898err:
1899 put_job(td);
1900 return -1;
892199bd
JA
1901}
1902
18e0b78c
JA
1903static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
1904{
f737299d 1905 unsigned int i;
18e0b78c
JA
1906
1907 CPU_ZERO(&cpumask);
1908
1909 for (i = 0; i < sizeof(int) * 8; i++) {
1910 if ((1 << i) & cpu)
1911 CPU_SET(i, &cpumask);
1912 }
1913}
1914
7889f07b
JA
1915unsigned long get_mult(char c)
1916{
1917 switch (c) {
1918 case 'k':
1919 case 'K':
1920 return 1024;
1921 case 'm':
1922 case 'M':
1923 return 1024 * 1024;
1924 case 'g':
1925 case 'G':
1926 return 1024 * 1024 * 1024;
1927 default:
1928 return 1;
1929 }
1930}
1931
02983297
JA
1932/*
1933 * convert string after '=' into decimal value, noting any size suffix
1934 */
1935static int str_cnv(char *p, unsigned long long *val)
1936{
02983297
JA
1937 char *str;
1938 int len;
1939
1940 str = strstr(p, "=");
1941 if (!str)
1942 return 1;
1943
1944 str++;
1945 len = strlen(str);
02983297
JA
1946
1947 *val = strtoul(str, NULL, 10);
1948 if (*val == ULONG_MAX && errno == ERANGE)
1949 return 1;
1950
7889f07b 1951 *val *= get_mult(str[len - 2]);
02983297 1952 return 0;
02983297
JA
1953}
1954
02983297
JA
1955static int check_strcnv(char *p, char *name, unsigned long long *val)
1956{
1957 if (!strstr(p, name))
1958 return 1;
1959
1960 return str_cnv(p, val);
1961}
1962
99c6704f
JA
1963static int check_str(char *p, char *name, char *option)
1964{
1965 char *s = strstr(p, name);
1966
1967 if (!s)
1968 return 1;
1969
1970 s += strlen(name);
1971 if (strstr(s, option))
1972 return 0;
1973
1974 return 1;
1975}
1976
fe74f555
JA
1977static int check_strstore(char *p, char *name, char *dest)
1978{
1979 char *s = strstr(p, name);
1980
1981 if (!s)
1982 return 1;
1983
1984 s = strstr(p, "=");
1985 if (!s)
1986 return 1;
1987
1988 s++;
1989 while (isblank(*s))
1990 s++;
1991
1992 strcpy(dest, s);
1993 return 0;
1994}
1995
7889f07b
JA
1996static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
1997{
1998 char str[128];
1999 char s1, s2;
2000
2001 sprintf(str, "%s=%%lu%%c-%%lu%%c", name);
2002 if (sscanf(p, str, s, &s1, e, &s2) == 4) {
2003 *s *= get_mult(s1);
2004 *e *= get_mult(s2);
2005 return 0;
2006 }
2007
2008 sprintf(str, "%s = %%lu%%c-%%lu%%c", name);
2009 if (sscanf(p, str, s, &s1, e, &s2) == 4) {
2010 *s *= get_mult(s1);
2011 *e *= get_mult(s2);
2012 return 0;
2013 }
2014
2015 sprintf(str, "%s=%%lu-%%lu", name);
2016 if (sscanf(p, str, s, e) == 2)
2017 return 0;
2018
2019 sprintf(str, "%s = %%lu-%%lu", name);
2020 if (sscanf(p, str, s, e) == 2)
2021 return 0;
2022
2023 return 1;
2024
2025}
2026
5c24b2c4 2027static int check_int(char *p, char *name, unsigned int *val)
7dd1389e
JA
2028{
2029 char str[128];
2030
2031 sprintf(str, "%s=%%d", name);
2032 if (sscanf(p, str, val) == 1)
2033 return 0;
2034
2035 sprintf(str, "%s = %%d", name);
2036 if (sscanf(p, str, val) == 1)
2037 return 0;
2038
2039 return 1;
2040}
2041
7292613b 2042static int is_empty_or_comment(char *line)
7dd1389e
JA
2043{
2044 unsigned int i;
2045
7292613b 2046 for (i = 0; i < strlen(line); i++) {
7292613b 2047 if (line[i] == ';')
47d45203
JA
2048 return 1;
2049 if (!isspace(line[i]) && !iscntrl(line[i]))
7292613b
JA
2050 return 0;
2051 }
7dd1389e
JA
2052
2053 return 1;
2054}
2055
5c24b2c4 2056static int parse_jobs_ini(char *file)
7dd1389e 2057{
47d45203 2058 unsigned int prioclass, prio, cpu, global;
8c033f93 2059 unsigned long long ull;
7889f07b 2060 unsigned long ul1, ul2;
f737299d 2061 struct thread_data *td;
7dd1389e
JA
2062 char *string, *name;
2063 fpos_t off;
2064 FILE *f;
2065 char *p;
2066
2067 f = fopen(file, "r");
2068 if (!f) {
2069 perror("fopen");
4240cfa1 2070 return 1;
7dd1389e
JA
2071 }
2072
2073 string = malloc(4096);
2074 name = malloc(256);
2075
7dd1389e 2076 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b
JA
2077 if (is_empty_or_comment(p))
2078 continue;
7dd1389e
JA
2079 if (sscanf(p, "[%s]", name) != 1)
2080 continue;
2081
47d45203
JA
2082 global = !strncmp(name, "global", 6);
2083
7dd1389e
JA
2084 name[strlen(name) - 1] = '\0';
2085
fe74f555 2086 td = get_new_job(global, &def_thread);
4240cfa1
JA
2087 if (!td)
2088 break;
f737299d 2089
7dd1389e 2090 prioclass = 2;
f737299d 2091 prio = 4;
7dd1389e
JA
2092
2093 fgetpos(f, &off);
2094 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b 2095 if (is_empty_or_comment(p))
e6402082
JA
2096 continue;
2097 if (strstr(p, "["))
7dd1389e 2098 break;
f737299d 2099 if (!check_int(p, "rw", &td->ddir)) {
7dd1389e
JA
2100 fgetpos(f, &off);
2101 continue;
2102 }
2103 if (!check_int(p, "prio", &prio)) {
2104 fgetpos(f, &off);
2105 continue;
2106 }
2107 if (!check_int(p, "prioclass", &prioclass)) {
2108 fgetpos(f, &off);
2109 continue;
2110 }
f737299d 2111 if (!check_int(p, "direct", &td->odirect)) {
7dd1389e
JA
2112 fgetpos(f, &off);
2113 continue;
2114 }
f737299d 2115 if (!check_int(p, "rate", &td->rate)) {
7dd1389e
JA
2116 fgetpos(f, &off);
2117 continue;
2118 }
4240cfa1
JA
2119 if (!check_int(p, "ratemin", &td->ratemin)) {
2120 fgetpos(f, &off);
2121 continue;
2122 }
2123 if (!check_int(p, "ratecycle", &td->ratecycle)) {
2124 fgetpos(f, &off);
2125 continue;
2126 }
e97712ed 2127 if (!check_int(p, "thinktime", &td->thinktime)) {
7dd1389e
JA
2128 fgetpos(f, &off);
2129 continue;
2130 }
18e0b78c 2131 if (!check_int(p, "cpumask", &cpu)) {
f737299d 2132 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
2133 fgetpos(f, &off);
2134 continue;
2135 }
4240cfa1
JA
2136 if (!check_int(p, "fsync", &td->fsync_blocks)) {
2137 fgetpos(f, &off);
2138 continue;
2139 }
fc24389f
JA
2140 if (!check_int(p, "startdelay", &td->start_delay)) {
2141 fgetpos(f, &off);
2142 continue;
2143 }
67903a2e
JA
2144 if (!check_int(p, "timeout", &td->timeout)) {
2145 fgetpos(f, &off);
2146 continue;
2147 }
b95799ca
JA
2148 if (!check_int(p, "invalidate",&td->invalidate_cache)) {
2149 fgetpos(f, &off);
2150 continue;
2151 }
43000118
JA
2152 if (!check_int(p, "aio_depth", &td->aio_depth)) {
2153 fgetpos(f, &off);
2154 continue;
2155 }
74b4b5fb
JA
2156 if (!check_int(p, "sync", &td->sync_io)) {
2157 fgetpos(f, &off);
2158 continue;
2159 }
1d035750
JA
2160 if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
2161 fgetpos(f, &off);
2162 continue;
2163 }
fc097bfe
JA
2164 if (!check_int(p, "create_serialize", &td->create_serialize)) {
2165 fgetpos(f, &off);
2166 continue;
2167 }
2168 if (!check_int(p, "create_fsync", &td->create_fsync)) {
2169 fgetpos(f, &off);
2170 continue;
2171 }
b6794fbf
JA
2172 if (!check_int(p, "loops", &td->loops)) {
2173 fgetpos(f, &off);
2174 continue;
2175 }
e8457004
JA
2176 if (!check_int(p, "verify", &td->verify)) {
2177 fgetpos(f, &off);
2178 continue;
2179 }
fe74f555
JA
2180 if (!check_int(p, "numjobs", &td->numjobs)) {
2181 fgetpos(f, &off);
2182 continue;
2183 }
7889f07b 2184 if (!check_range(p, "bsrange", &ul1, &ul2)) {
7f27790b
JA
2185 if (ul1 & 511)
2186 printf("bad min block size, must be a multiple of 512\n");
2187 else
2188 td->min_bs = ul1;
2189 if (ul2 & 511)
2190 printf("bad max block size, must be a multiple of 512\n");
2191 else
2192 td->max_bs = ul2;
7889f07b
JA
2193 fgetpos(f, &off);
2194 continue;
2195 }
8c033f93 2196 if (!check_strcnv(p, "bs", &ull)) {
7f27790b
JA
2197 if (ull & 511)
2198 printf("bad block size, must be a multiple of 512\n");
2199 else
2200 td->bs = ull;
8c033f93
JA
2201 fgetpos(f, &off);
2202 continue;
2203 }
02983297
JA
2204 if (!check_strcnv(p, "size", &td->file_size)) {
2205 fgetpos(f, &off);
2206 continue;
2207 }
2208 if (!check_strcnv(p, "offset", &td->file_offset)) {
2209 fgetpos(f, &off);
2210 continue;
2211 }
fe74f555 2212 if (!check_strstore(p, "directory", td->directory)) {
fe74f555
JA
2213 fgetpos(f, &off);
2214 continue;
2215 }
99c6704f 2216 if (!check_str(p, "mem", "malloc")) {
99c6704f
JA
2217 td->mem_type = MEM_MALLOC;
2218 fgetpos(f, &off);
2219 continue;
2220 }
2221 if (!check_str(p, "mem", "shm")) {
99c6704f
JA
2222 td->mem_type = MEM_SHM;
2223 fgetpos(f, &off);
2224 continue;
2225 }
43000118 2226 if (!strncmp(p, "sequential", 10)) {
f737299d 2227 td->sequential = 1;
7dd1389e
JA
2228 fgetpos(f, &off);
2229 continue;
2230 }
43000118 2231 if (!strncmp(p, "random", 6)) {
f737299d 2232 td->sequential = 0;
7dd1389e
JA
2233 fgetpos(f, &off);
2234 continue;
2235 }
43000118
JA
2236 if (!strncmp(p, "aio", 3)) {
2237 td->use_aio = 1;
2238 fgetpos(f, &off);
2239 continue;
2240 }
02983297
JA
2241 if (!strncmp(p, "create", 6)) {
2242 td->create_file = 1;
2243 fgetpos(f, &off);
2244 continue;
2245 }
2246 if (!strncmp(p, "overwrite", 9)) {
2247 td->overwrite = 1;
2248 fgetpos(f, &off);
2249 continue;
2250 }
98dd52d6
JA
2251 if (!strncmp(p, "exitall", 7)) {
2252 exitall_on_terminate = 1;
2253 fgetpos(f, &off);
2254 continue;
2255 }
2a81240d
JA
2256 if (!strncmp(p, "stonewall", 9)) {
2257 td->stonewall = 1;
2258 fgetpos(f, &off);
2259 continue;
2260 }
e6402082 2261 printf("Client%d: bad option %s\n",td->thread_number,p);
7dd1389e
JA
2262 }
2263 fsetpos(f, &off);
2264
4240cfa1 2265 if (add_job(td, name, prioclass, prio))
fe74f555 2266 break;
7dd1389e
JA
2267 }
2268
2269 free(string);
2270 free(name);
fc7d63df 2271 fclose(f);
4240cfa1 2272 return 0;
7dd1389e
JA
2273}
2274
5c24b2c4 2275static int parse_options(int argc, char *argv[])
892199bd 2276{
01c4d8de 2277 int i;
892199bd
JA
2278
2279 for (i = 1; i < argc; i++) {
2280 char *parm = argv[i];
2281
2282 if (parm[0] != '-')
2283 break;
2284
2285 parm++;
2286 switch (*parm) {
2287 case 's':
2288 parm++;
47d45203 2289 def_thread.sequential = !!atoi(parm);
892199bd
JA
2290 break;
2291 case 'b':
2292 parm++;
47d45203
JA
2293 def_thread.bs = atoi(parm);
2294 def_thread.bs <<= 10;
2295 if (!def_thread.bs) {
4240cfa1 2296 printf("bad block size\n");
47d45203 2297 def_thread.bs = DEF_BS;
4240cfa1 2298 }
892199bd
JA
2299 break;
2300 case 't':
2301 parm++;
47d45203 2302 def_thread.timeout = atoi(parm);
892199bd 2303 break;
892199bd
JA
2304 case 'r':
2305 parm++;
2306 repeatable = !!atoi(parm);
2307 break;
02bdd9ba
JA
2308 case 'R':
2309 parm++;
2310 rate_quit = !!atoi(parm);
2311 break;
892199bd
JA
2312 case 'o':
2313 parm++;
47d45203 2314 def_thread.odirect = !!atoi(parm);
892199bd 2315 break;
7dd1389e
JA
2316 case 'f':
2317 if (i + 1 >= argc) {
2318 printf("-f needs file as arg\n");
2319 break;
2320 }
2321 ini_file = strdup(argv[i+1]);
a642279f 2322 i++;
7dd1389e 2323 break;
a0a9b35b
JA
2324 case 'l':
2325 write_lat_log = 1;
2326 break;
2327 case 'w':
2328 write_bw_log = 1;
2329 break;
892199bd 2330 default:
7dd1389e 2331 printf("bad option %s\n", argv[i]);
892199bd
JA
2332 break;
2333 }
2334 }
2335
892199bd
JA
2336 return i;
2337}
2338
3f39453a 2339static void print_thread_status(struct thread_data *td, int nr_running,
8dbff0b1 2340 int t_rate, int m_rate)
3f39453a 2341{
3f39453a
JA
2342 printf("Threads now running: %d", nr_running);
2343 if (m_rate || t_rate)
2344 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
8dbff0b1
JA
2345 printf(" : [%s]\r", run_str);
2346 fflush(stdout);
3f39453a
JA
2347}
2348
40ef7f64
JA
2349static void check_str_update(struct thread_data *td, int n, int t, int m)
2350{
2351 char c = run_str[td->thread_number - 1];
2352
2353 if (td->runstate == td->old_runstate)
2354 return;
2355
2356 switch (td->runstate) {
2357 case TD_REAPED:
2358 c = '_';
2359 break;
f4bb2243
JA
2360 case TD_EXITED:
2361 c = 'E';
2362 break;
40ef7f64 2363 case TD_RUNNING:
af678352
JA
2364 if (td_read(td)) {
2365 if (td->sequential)
2366 c = 'R';
2367 else
2368 c = 'r';
2369 } else {
2370 if (td->sequential)
2371 c = 'W';
2372 else
2373 c = 'w';
2374 }
40ef7f64
JA
2375 break;
2376 case TD_VERIFYING:
2377 c = 'V';
2378 break;
2379 case TD_CREATED:
2380 c = 'C';
2381 break;
2382 case TD_NOT_CREATED:
2383 c = 'P';
2384 break;
2385 default:
2386 printf("state %d\n", td->runstate);
2387 }
2388
2389 run_str[td->thread_number - 1] = c;
2390 print_thread_status(td, n, t, m);
2391 td->old_runstate = td->runstate;
2392}
2393
213b446c 2394static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 2395{
213b446c 2396 int i;
02bdd9ba 2397
3f39453a
JA
2398 /*
2399 * reap exited threads (TD_EXITED -> TD_REAPED)
2400 */
02bdd9ba
JA
2401 for (i = 0; i < thread_number; i++) {
2402 struct thread_data *td = &threads[i];
2403
40ef7f64
JA
2404 check_str_update(td, *nr_running, *t_rate, *m_rate);
2405
213b446c
JA
2406 if (td->runstate != TD_EXITED)
2407 continue;
02bdd9ba 2408
40ef7f64 2409 td_set_runstate(td, TD_REAPED);
213b446c
JA
2410 waitpid(td->pid, NULL, 0);
2411 (*nr_running)--;
2412 (*m_rate) -= td->ratemin;
2413 (*t_rate) -= td->rate;
40ef7f64 2414 check_str_update(td, *nr_running, *t_rate, *m_rate);
e6402082
JA
2415
2416 if (td->terminate)
2417 continue;
213b446c 2418 }
02bdd9ba
JA
2419}
2420
fc24389f
JA
2421static void run_threads(char *argv[])
2422{
be33abe4 2423 struct timeval genesis;
fc24389f
JA
2424 struct thread_data *td;
2425 unsigned long spent;
2a81240d 2426 int i, todo, nr_running, m_rate, t_rate, nr_started;
fc24389f 2427
fc24389f
JA
2428 printf("Starting %d threads\n", thread_number);
2429 fflush(stdout);
2430
7292613b
JA
2431 signal(SIGINT, sig_handler);
2432
fc24389f 2433 todo = thread_number;
02bdd9ba 2434 nr_running = 0;
2a81240d 2435 nr_started = 0;
213b446c 2436 m_rate = t_rate = 0;
fc24389f 2437
8bdcfab5
JA
2438 for (i = 0; i < thread_number; i++) {
2439 td = &threads[i];
2440
fc097bfe
JA
2441 if (!td->create_serialize)
2442 continue;
2443
8bdcfab5
JA
2444 /*
2445 * do file setup here so it happens sequentially,
2446 * we don't want X number of threads getting their
2447 * client data interspersed on disk
2448 */
2449 if (setup_file(td)) {
40ef7f64 2450 td_set_runstate(td, TD_REAPED);
8bdcfab5
JA
2451 todo--;
2452 }
2453 }
2454
2455 gettimeofday(&genesis, NULL);
2456
213b446c 2457 while (todo) {
3f39453a
JA
2458 /*
2459 * create threads (TD_NOT_CREATED -> TD_CREATED)
2460 */
fc24389f
JA
2461 for (i = 0; i < thread_number; i++) {
2462 td = &threads[i];
2463
02bdd9ba 2464 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
2465 continue;
2466
213b446c
JA
2467 /*
2468 * never got a chance to start, killed by other
2469 * thread for some reason
2470 */
2471 if (td->terminate) {
2472 todo--;
2473 continue;
2474 }
2475
fc24389f 2476 if (td->start_delay) {
be33abe4 2477 spent = mtime_since_now(&genesis);
fc24389f
JA
2478
2479 if (td->start_delay * 1000 > spent)
2480 continue;
2481 }
2482
2a81240d 2483 if (td->stonewall && (nr_started || nr_running))
ea6f96a2 2484 break;
2a81240d 2485
40ef7f64
JA
2486 td_set_runstate(td, TD_CREATED);
2487 check_str_update(td, nr_running, t_rate, m_rate);
fc24389f
JA
2488 sem_init(&startup_sem, 1, 1);
2489 todo--;
2a81240d 2490 nr_started++;
fc24389f
JA
2491
2492 if (fork())
2493 sem_wait(&startup_sem);
2494 else {
2495 thread_main(shm_id, i, argv);
2496 exit(0);
2497 }
2498 }
2499
3f39453a 2500 /*
e8457004 2501 * start created threads (TD_CREATED -> TD_RUNNING)
3f39453a 2502 */
fc24389f
JA
2503 for (i = 0; i < thread_number; i++) {
2504 struct thread_data *td = &threads[i];
2505
3f39453a
JA
2506 if (td->runstate != TD_CREATED)
2507 continue;
2508
40ef7f64 2509 td_set_runstate(td, TD_RUNNING);
3f39453a 2510 nr_running++;
2a81240d 2511 nr_started--;
3f39453a
JA
2512 m_rate += td->ratemin;
2513 t_rate += td->rate;
40ef7f64 2514 check_str_update(td, nr_running, t_rate, m_rate);
3f39453a 2515 sem_post(&td->mutex);
fc24389f
JA
2516 }
2517
e8457004
JA
2518 for (i = 0; i < thread_number; i++) {
2519 struct thread_data *td = &threads[i];
2520
b48889bb
JA
2521 if (td->runstate != TD_RUNNING &&
2522 td->runstate != TD_VERIFYING)
e8457004
JA
2523 continue;
2524
40ef7f64 2525 check_str_update(td, nr_running, t_rate, m_rate);
e8457004
JA
2526 }
2527
213b446c 2528 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 2529
fc24389f
JA
2530 if (todo)
2531 usleep(100000);
2532 }
02bdd9ba
JA
2533
2534 while (nr_running) {
213b446c 2535 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
2536 usleep(10000);
2537 }
fc24389f
JA
2538}
2539
8867c0a8 2540int setup_thread_area(void)
892199bd 2541{
8867c0a8
JA
2542 /*
2543 * 1024 is too much on some machines, scale max_jobs if
2544 * we get a failure that looks like too large a shm segment
2545 */
2546 do {
2547 int s = max_jobs * sizeof(struct thread_data);
18e0b78c 2548
8867c0a8
JA
2549 shm_id = shmget(0, s, IPC_CREAT | 0600);
2550 if (shm_id != -1)
2551 break;
2552 if (errno != EINVAL) {
2553 perror("shmget");
2554 break;
2555 }
2556
2557 max_jobs >>= 1;
d4fac444 2558 } while (max_jobs);
8867c0a8
JA
2559
2560 if (shm_id == -1)
892199bd 2561 return 1;
892199bd
JA
2562
2563 threads = shmat(shm_id, NULL, 0);
8867c0a8 2564 if (threads == (void *) -1) {
86184d14
JA
2565 perror("shmat");
2566 return 1;
2567 }
892199bd
JA
2568
2569 atexit(free_shm);
8867c0a8
JA
2570 return 0;
2571}
2572
2573int main(int argc, char *argv[])
2574{
8c033f93 2575 static unsigned long max_run[2], min_run[2];
57d753e3 2576 static unsigned long max_bw[2], min_bw[2];
22334044 2577 static unsigned long io_mb[2], agg[2];
8867c0a8
JA
2578 int i;
2579
2580 if (setup_thread_area())
2581 return 1;
892199bd 2582
47d45203 2583 if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &def_thread.cpumask) == -1) {
4240cfa1
JA
2584 perror("sched_getaffinity");
2585 return 1;
2586 }
2587
47d45203
JA
2588 /*
2589 * fill globals
2590 */
2591 def_thread.ddir = DDIR_READ;
2592 def_thread.bs = DEF_BS;
7889f07b
JA
2593 def_thread.min_bs = -1;
2594 def_thread.max_bs = -1;
02983297 2595 def_thread.odirect = DEF_ODIRECT;
47d45203 2596 def_thread.ratecycle = DEF_RATE_CYCLE;
02983297 2597 def_thread.sequential = DEF_SEQUENTIAL;
47d45203 2598 def_thread.timeout = DEF_TIMEOUT;
02983297
JA
2599 def_thread.create_file = DEF_CREATE;
2600 def_thread.overwrite = DEF_OVERWRITE;
b95799ca 2601 def_thread.invalidate_cache = DEF_INVALIDATE;
99c6704f
JA
2602 def_thread.sync_io = DEF_SYNCIO;
2603 def_thread.mem_type = MEM_MALLOC;
1d035750 2604 def_thread.bw_avg_time = DEF_BWAVGTIME;
fc097bfe
JA
2605 def_thread.create_serialize = DEF_CREATE_SER;
2606 def_thread.create_fsync = DEF_CREATE_FSYNC;
b6794fbf 2607 def_thread.loops = DEF_LOOPS;
cfc702bd 2608 def_thread.verify = DEF_VERIFY;
2a81240d 2609 def_thread.stonewall = DEF_STONEWALL;
fe74f555 2610 def_thread.numjobs = DEF_NUMJOBS;
47d45203 2611
892199bd 2612 i = parse_options(argc, argv);
7dd1389e 2613
5961d92c
JA
2614 if (!ini_file) {
2615 printf("Need job file\n");
2616 return 1;
2617 }
2618
2619 if (parse_jobs_ini(ini_file))
2620 return 1;
7dd1389e 2621
4240cfa1
JA
2622 if (!thread_number) {
2623 printf("Nothing to do\n");
2624 return 1;
2625 }
7dd1389e 2626
fc24389f 2627 run_threads(argv);
892199bd 2628
892199bd
JA
2629 min_bw[0] = min_run[0] = ~0UL;
2630 min_bw[1] = min_run[1] = ~0UL;
22334044
JA
2631 io_mb[0] = io_mb[1] = 0;
2632 agg[0] = agg[1] = 0;
892199bd
JA
2633 for (i = 0; i < thread_number; i++) {
2634 struct thread_data *td = &threads[i];
2635 unsigned long bw = 0;
2636
22334044
JA
2637 if (!td->error) {
2638 if (td->runtime < min_run[td->ddir])
2639 min_run[td->ddir] = td->runtime;
2640 if (td->runtime > max_run[td->ddir])
2641 max_run[td->ddir] = td->runtime;
892199bd 2642
892199bd 2643 if (td->runtime)
49d2caab 2644 bw = td->io_bytes / td->runtime;
22334044
JA
2645 if (bw < min_bw[td->ddir])
2646 min_bw[td->ddir] = bw;
2647 if (bw > max_bw[td->ddir])
2648 max_bw[td->ddir] = bw;
2649
49d2caab 2650 io_mb[td->ddir] += td->io_bytes >> 20;
892199bd
JA
2651 }
2652
2653 show_thread_status(td);
2654 }
22334044
JA
2655
2656 if (max_run[0])
49d2caab 2657 agg[0] = (io_mb[0] * 1024 * 1000) / max_run[0];
22334044 2658 if (max_run[1])
49d2caab 2659 agg[1] = (io_mb[1] * 1024 * 1000) / max_run[1];
22334044 2660
57d753e3 2661 printf("\nRun status:\n");
892199bd 2662 if (max_run[DDIR_READ])
22334044 2663 printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", io_mb[0], agg[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
892199bd 2664 if (max_run[DDIR_WRITE])
22334044 2665 printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", io_mb[1], agg[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
fc24389f 2666
892199bd
JA
2667 return 0;
2668}