[PATCH] fio: improve timing on low number of usecs
[disktools.git] / fio.c
CommitLineData
abe4da87
JA
1/*
2 * fio - the flexible io tester
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
892199bd
JA
21#include <stdio.h>
22#include <stdlib.h>
23#include <unistd.h>
24#include <fcntl.h>
25#include <string.h>
26#include <errno.h>
27#include <signal.h>
28#include <time.h>
7dd1389e 29#include <ctype.h>
18e0b78c 30#include <sched.h>
43000118 31#include <libaio.h>
e128065d 32#include <math.h>
02983297 33#include <limits.h>
49d2caab 34#include <assert.h>
892199bd
JA
35#include <sys/time.h>
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/wait.h>
39#include <semaphore.h>
40#include <sys/ipc.h>
41#include <sys/shm.h>
c94deb1c 42#include <sys/ioctl.h>
892199bd 43#include <asm/unistd.h>
946d8870 44#include <asm/types.h>
49d2caab 45#include <asm/bitops.h>
892199bd 46
fd11d7af 47#include "arch.h"
2c83567e 48#include "list.h"
e8457004 49#include "md5.h"
2c83567e 50
c94deb1c
JA
51#ifndef BLKGETSIZE64
52#define BLKGETSIZE64 _IOR(0x12,114,size_t)
53#endif
54
4240cfa1
JA
55#define MAX_JOBS (1024)
56
892199bd
JA
57static int ioprio_set(int which, int who, int ioprio)
58{
59 return syscall(__NR_ioprio_set, which, who, ioprio);
60}
61
b95799ca
JA
62/*
63 * we want fadvise64 really, but it's so tangled... later
64 */
65static int fadvise(int fd, loff_t offset, size_t len, int advice)
66{
67#if 0
68 return syscall(__NR_fadvise64, fd, offset, offset >> 32, len, advice);
69#else
70 return posix_fadvise(fd, (off_t) offset, len, advice);
71#endif
72}
73
892199bd
JA
74enum {
75 IOPRIO_WHO_PROCESS = 1,
76 IOPRIO_WHO_PGRP,
77 IOPRIO_WHO_USER,
78};
79
80#define IOPRIO_CLASS_SHIFT 13
81
892199bd
JA
82#define MASK (4095)
83
4240cfa1 84#define DEF_BS (4096)
01f79976 85#define DEF_TIMEOUT (0)
4240cfa1
JA
86#define DEF_RATE_CYCLE (1000)
87#define DEF_ODIRECT (1)
88#define DEF_SEQUENTIAL (1)
4240cfa1 89#define DEF_RAND_REPEAT (1)
02983297
JA
90#define DEF_OVERWRITE (0)
91#define DEF_CREATE (1)
b95799ca 92#define DEF_INVALIDATE (1)
99c6704f 93#define DEF_SYNCIO (0)
eb39963f 94#define DEF_RANDSEED (0xb1899bedUL)
1d035750 95#define DEF_BWAVGTIME (500)
fc097bfe
JA
96#define DEF_CREATE_SER (1)
97#define DEF_CREATE_FSYNC (1)
b6794fbf 98#define DEF_LOOPS (1)
cfc702bd 99#define DEF_VERIFY (0)
2a81240d 100#define DEF_STONEWALL (0)
4240cfa1
JA
101
102#define ALIGN(buf) (char *) (((unsigned long) (buf) + MASK) & ~(MASK))
892199bd 103
4240cfa1 104static int repeatable = DEF_RAND_REPEAT;
02bdd9ba 105static int rate_quit = 1;
a0a9b35b
JA
106static int write_lat_log;
107static int write_bw_log;
98dd52d6 108static int exitall_on_terminate;
892199bd 109
892199bd 110static int thread_number;
7dd1389e 111static char *ini_file;
892199bd 112
8867c0a8
JA
113static int max_jobs = MAX_JOBS;
114
8dbff0b1
JA
115static char run_str[MAX_JOBS + 1];
116
892199bd
JA
117static int shm_id;
118
4240cfa1
JA
119enum {
120 DDIR_READ = 0,
121 DDIR_WRITE,
122};
892199bd 123
02bdd9ba
JA
124/*
125 * thread life cycle
126 */
127enum {
128 TD_NOT_CREATED = 0,
129 TD_CREATED,
e8457004
JA
130 TD_RUNNING,
131 TD_VERIFYING,
02bdd9ba
JA
132 TD_EXITED,
133 TD_REAPED,
134};
135
99c6704f
JA
136enum {
137 MEM_MALLOC,
138 MEM_SHM,
139};
140
2c83567e
JA
141/*
142 * The io unit
143 */
144struct io_u {
145 struct iocb iocb;
57d753e3 146 struct timeval start_time;
2c83567e
JA
147 struct timeval issue_time;
148
2c83567e
JA
149 char *buf;
150 unsigned int buflen;
4ac89145 151 unsigned long long offset;
2c83567e
JA
152
153 struct list_head list;
154};
155
57d753e3
JA
156struct io_stat {
157 unsigned long val;
158 unsigned long val_sq;
159 unsigned long max_val;
160 unsigned long min_val;
161 unsigned long samples;
162};
163
a0a9b35b
JA
164struct io_sample {
165 unsigned long time;
166 unsigned long val;
167};
168
169struct io_log {
170 unsigned long nr_samples;
171 unsigned long max_samples;
172 struct io_sample *log;
173};
174
645785e5
JA
175struct io_piece {
176 struct list_head list;
177 unsigned long long offset;
178 unsigned int len;
179};
180
e8457004
JA
181#define FIO_HDR_MAGIC 0xf00baaef
182
183struct verify_header {
184 unsigned int fio_magic;
185 unsigned int len;
186 char md5_digest[MD5_HASH_WORDS * 4];
187};
188
02983297 189#define td_read(td) ((td)->ddir == DDIR_READ)
62bb4285
JA
190#define td_write(td) ((td)->ddir == DDIR_WRITE)
191#define should_fsync(td) (td_write(td) && !(td)->odirect)
02983297 192
49d2caab
JA
193#define BLOCKS_PER_MAP (8 * sizeof(long))
194#define RAND_MAP_IDX(sector) ((sector) / BLOCKS_PER_MAP)
195#define RAND_MAP_BIT(sector) ((sector) & (BLOCKS_PER_MAP - 1))
196
892199bd
JA
197struct thread_data {
198 char file_name[256];
199 int thread_number;
200 int error;
201 int fd;
892199bd 202 pid_t pid;
6b71c826 203 char *orig_buffer;
4240cfa1 204 volatile int terminate;
02bdd9ba 205 volatile int runstate;
40ef7f64 206 volatile int old_runstate;
f737299d
JA
207 unsigned int ddir;
208 unsigned int ioprio;
209 unsigned int sequential;
210 unsigned int bs;
8c033f93
JA
211 unsigned int min_bs;
212 unsigned int max_bs;
f737299d 213 unsigned int odirect;
e97712ed 214 unsigned int thinktime;
4240cfa1 215 unsigned int fsync_blocks;
fc24389f 216 unsigned int start_delay;
47d45203 217 unsigned int timeout;
43000118 218 unsigned int use_aio;
02983297
JA
219 unsigned int create_file;
220 unsigned int overwrite;
b95799ca 221 unsigned int invalidate_cache;
1d035750 222 unsigned int bw_avg_time;
fc097bfe
JA
223 unsigned int create_serialize;
224 unsigned int create_fsync;
b6794fbf 225 unsigned int loops;
02983297
JA
226 unsigned long long file_size;
227 unsigned long long file_offset;
74b4b5fb 228 unsigned int sync_io;
99c6704f 229 unsigned int mem_type;
cfc702bd 230 unsigned int verify;
2a81240d 231 unsigned int stonewall;
18e0b78c 232 cpu_set_t cpumask;
86184d14 233
7889f07b 234 struct drand48_data bsrange_state;
e8457004 235 struct drand48_data verify_state;
7889f07b 236
99c6704f
JA
237 int shm_id;
238
63a09e51
JA
239 off_t cur_off;
240
254605cd 241 io_context_t aio_ctx;
43000118 242 unsigned int aio_depth;
43000118 243 struct io_event *aio_events;
2c83567e
JA
244
245 unsigned int cur_depth;
246 struct list_head io_u_freelist;
247 struct list_head io_u_busylist;
43000118 248
7dd1389e 249 unsigned int rate;
4240cfa1
JA
250 unsigned int ratemin;
251 unsigned int ratecycle;
252 unsigned long rate_usec_cycle;
253 long rate_pending_usleep;
49d2caab 254 unsigned long rate_bytes;
4240cfa1 255 struct timeval lastrate;
86184d14 256
892199bd 257 unsigned long runtime; /* sec */
49d2caab 258 unsigned long long io_size;
e2ba35d9 259
4240cfa1 260 unsigned long io_blocks;
49d2caab
JA
261 unsigned long io_bytes;
262 unsigned long this_io_bytes;
263 unsigned long last_bytes;
892199bd 264 sem_t mutex;
49d2caab 265
892199bd 266 struct drand48_data random_state;
49d2caab
JA
267 unsigned long *file_map;
268 unsigned int num_maps;
892199bd
JA
269
270 /*
e128065d 271 * bandwidth and latency stats
892199bd 272 */
57d753e3
JA
273 struct io_stat clat_stat; /* completion latency */
274 struct io_stat slat_stat; /* submission latency */
275
276 struct io_stat bw_stat; /* bandwidth stats */
49d2caab 277 unsigned long stat_io_bytes;
fd1ae4c9 278 struct timeval stat_sample_time;
4240cfa1 279
a0a9b35b
JA
280 struct io_log *lat_log;
281 struct io_log *bw_log;
282
4240cfa1 283 struct timeval start;
92b229ed
JA
284 struct rusage ru_start;
285 struct rusage ru_end;
645785e5
JA
286
287 struct list_head io_hist_list;
892199bd
JA
288};
289
290static struct thread_data *threads;
47d45203 291static struct thread_data def_thread;
892199bd
JA
292
293static sem_t startup_sem;
294
5c24b2c4 295static void sig_handler(int sig)
892199bd
JA
296{
297 int i;
298
213b446c
JA
299 for (i = 0; i < thread_number; i++) {
300 struct thread_data *td = &threads[i];
301
302 td->terminate = 1;
303 td->start_delay = 0;
304 }
02bdd9ba
JA
305}
306
e8457004 307static int init_random_state(struct thread_data *td)
946d8870 308{
e8457004 309 unsigned long seed;
49d2caab 310 int fd, num_maps, blocks;
946d8870 311
e8457004
JA
312 fd = open("/dev/random", O_RDONLY);
313 if (fd == -1) {
314 td->error = errno;
315 return 1;
946d8870
JA
316 }
317
e8457004
JA
318 if (read(fd, &seed, sizeof(seed)) < (int) sizeof(seed)) {
319 td->error = EIO;
320 close(fd);
321 return 1;
946d8870
JA
322 }
323
e8457004 324 close(fd);
7889f07b
JA
325
326 srand48_r(seed, &td->bsrange_state);
e8457004 327 srand48_r(seed, &td->verify_state);
892199bd
JA
328
329 if (td->sequential)
330 return 0;
331
e8457004
JA
332 if (repeatable)
333 seed = DEF_RANDSEED;
892199bd 334
49d2caab
JA
335 blocks = (td->io_size + td->min_bs - 1) / td->min_bs;
336 num_maps = blocks / BLOCKS_PER_MAP;
337 td->file_map = malloc(num_maps * sizeof(long));
338 td->num_maps = num_maps;
339 memset(td->file_map, 0, num_maps * sizeof(long));
340
892199bd
JA
341 srand48_r(seed, &td->random_state);
342 return 0;
343}
344
5c24b2c4 345static unsigned long utime_since(struct timeval *s, struct timeval *e)
892199bd
JA
346{
347 double sec, usec;
348
349 sec = e->tv_sec - s->tv_sec;
350 usec = e->tv_usec - s->tv_usec;
351 if (sec > 0 && usec < 0) {
352 sec--;
353 usec += 1000000;
354 }
355
356 sec *= (double) 1000000;
357
358 return sec + usec;
359}
360
fd11d7af
JA
361static unsigned long utime_since_now(struct timeval *s)
362{
363 struct timeval t;
364
365 gettimeofday(&t, NULL);
366 return utime_since(s, &t);
367}
368
5c24b2c4 369static unsigned long mtime_since(struct timeval *s, struct timeval *e)
892199bd
JA
370{
371 double sec, usec;
372
373 sec = e->tv_sec - s->tv_sec;
374 usec = e->tv_usec - s->tv_usec;
375 if (sec > 0 && usec < 0) {
376 sec--;
377 usec += 1000000;
378 }
379
380 sec *= (double) 1000;
381 usec /= (double) 1000;
382
383 return sec + usec;
384}
385
be33abe4
JA
386static unsigned long mtime_since_now(struct timeval *s)
387{
388 struct timeval t;
389
390 gettimeofday(&t, NULL);
391 return mtime_since(s, &t);
392}
393
98168d55
JA
394static inline unsigned long msec_now(struct timeval *s)
395{
396 return s->tv_sec * 1000 + s->tv_usec / 1000;
397}
398
49d2caab
JA
399static int random_map_free(struct thread_data *td, unsigned long long block)
400{
401 unsigned int idx = RAND_MAP_IDX(block);
402 unsigned int bit = RAND_MAP_BIT(block);
403
404 return (td->file_map[idx] & (1UL << bit)) == 0;
405}
406
407static int get_next_free_block(struct thread_data *td, unsigned long long *b)
892199bd 408{
49d2caab
JA
409 int i;
410
411 *b = 0;
412 i = 0;
413 while ((*b) * td->min_bs < td->io_size) {
414 if (td->file_map[i] != -1UL) {
415 *b += ffz(td->file_map[i]);
416 return 0;
417 }
418
419 *b += BLOCKS_PER_MAP;
420 i++;
421 }
422
423 return 1;
424}
425
426static void mark_random_map(struct thread_data *td, struct io_u *io_u)
427{
428 unsigned long block = io_u->offset / td->min_bs;
429 unsigned int blocks = 0;
430
431 while (blocks < (io_u->buflen / td->min_bs)) {
432 int idx, bit;
433
434 if (!random_map_free(td, block))
435 break;
436
437 idx = RAND_MAP_IDX(block);
438 bit = RAND_MAP_BIT(block);
439
440 assert(idx < td->num_maps);
441
442 td->file_map[idx] |= (1UL << bit);
443 block++;
444 blocks++;
445 }
446
447 if ((blocks * td->min_bs) < io_u->buflen)
448 io_u->buflen = blocks * td->min_bs;
449}
450
451static int get_next_offset(struct thread_data *td, unsigned long long *offset)
452{
453 unsigned long long b;
d32d9284 454 long r;
892199bd
JA
455
456 if (!td->sequential) {
49d2caab
JA
457 unsigned long max_blocks = td->io_size / td->min_bs;
458 int loops = 50;
459
460 do {
461 lrand48_r(&td->random_state, &r);
462 b = ((max_blocks - 1) * r / (RAND_MAX+1.0));
463 loops--;
464 } while (!random_map_free(td, b) && loops);
465
466 if (!loops) {
467 if (get_next_free_block(td, &b))
468 return 1;
469 }
7889f07b 470 } else
49d2caab 471 b = td->last_bytes / td->min_bs;
7889f07b 472
49d2caab
JA
473 *offset = (b * td->min_bs) + td->file_offset;
474 return 0;
7889f07b
JA
475}
476
477static unsigned int get_next_buflen(struct thread_data *td)
478{
479 unsigned int buflen;
d32d9284 480 long r;
7889f07b
JA
481
482 if (td->min_bs == td->max_bs)
483 buflen = td->min_bs;
484 else {
d32d9284 485 lrand48_r(&td->bsrange_state, &r);
7889f07b
JA
486 buflen = (1 + (double) (td->max_bs - 1) * r / (RAND_MAX + 1.0));
487 buflen = (buflen + td->min_bs - 1) & ~(td->min_bs - 1);
892199bd
JA
488 }
489
49d2caab
JA
490 if (buflen > td->io_size - td->this_io_bytes)
491 buflen = td->io_size - td->this_io_bytes;
7889f07b 492
7889f07b 493 return buflen;
892199bd
JA
494}
495
57d753e3
JA
496static inline void add_stat_sample(struct thread_data *td, struct io_stat *is,
497 unsigned long val)
892199bd 498{
57d753e3
JA
499 if (val > is->max_val)
500 is->max_val = val;
501 if (val < is->min_val)
502 is->min_val = val;
503
504 is->val += val;
505 is->val_sq += val * val;
506 is->samples++;
507}
fd1ae4c9 508
a0a9b35b
JA
509static void add_log_sample(struct thread_data *td, struct io_log *log,
510 unsigned long val)
511{
512 if (log->nr_samples == log->max_samples) {
513 int new_size = sizeof(struct io_sample) * log->max_samples * 2;
514
515 log->log = realloc(log->log, new_size);
516 log->max_samples <<= 1;
517 }
518
519 log->log[log->nr_samples].val = val;
520 log->log[log->nr_samples].time = mtime_since_now(&td->start);
521 log->nr_samples++;
522}
523
57d753e3
JA
524static void add_clat_sample(struct thread_data *td, unsigned long msec)
525{
526 add_stat_sample(td, &td->clat_stat, msec);
a0a9b35b
JA
527
528 if (td->lat_log)
529 add_log_sample(td, td->lat_log, msec);
57d753e3 530}
fd1ae4c9 531
57d753e3
JA
532static void add_slat_sample(struct thread_data *td, unsigned long msec)
533{
534 add_stat_sample(td, &td->slat_stat, msec);
535}
fd1ae4c9 536
645785e5 537static void add_bw_sample(struct thread_data *td)
57d753e3
JA
538{
539 unsigned long spent = mtime_since_now(&td->stat_sample_time);
540 unsigned long rate;
541
1d035750 542 if (spent < td->bw_avg_time)
57d753e3
JA
543 return;
544
49d2caab 545 rate = (td->this_io_bytes - td->stat_io_bytes) / spent;
57d753e3
JA
546 add_stat_sample(td, &td->bw_stat, rate);
547
a0a9b35b
JA
548 if (td->bw_log)
549 add_log_sample(td, td->bw_log, rate);
550
57d753e3 551 gettimeofday(&td->stat_sample_time, NULL);
49d2caab 552 td->stat_io_bytes = td->this_io_bytes;
892199bd
JA
553}
554
fd11d7af
JA
555/*
556 * busy looping version for the last few usec
557 */
558static void __usec_sleep(int usec)
559{
560 struct timeval start;
561
562 gettimeofday(&start, NULL);
563 while (utime_since_now(&start) < usec)
564 __asm__ __volatile__("rep;nop": : :"memory");
565}
566
5c24b2c4 567static void usec_sleep(int usec)
892199bd 568{
86184d14
JA
569 struct timespec req = { .tv_sec = 0, .tv_nsec = usec * 1000 };
570 struct timespec rem;
892199bd
JA
571
572 do {
fd11d7af
JA
573 if (usec < 5000) {
574 __usec_sleep(usec);
575 break;
576 }
86184d14
JA
577 rem.tv_sec = rem.tv_nsec = 0;
578 nanosleep(&req, &rem);
579 if (!rem.tv_nsec)
892199bd 580 break;
86184d14
JA
581
582 req.tv_nsec = rem.tv_nsec;
fd11d7af 583 usec = rem.tv_nsec * 1000;
892199bd
JA
584 } while (1);
585}
586
9e850933
JA
587static void rate_throttle(struct thread_data *td, unsigned long time_spent,
588 unsigned int bytes)
86184d14 589{
9e850933
JA
590 unsigned long usec_cycle;
591
4240cfa1
JA
592 if (!td->rate)
593 return;
594
9e850933
JA
595 usec_cycle = td->rate_usec_cycle * (bytes / td->min_bs);
596
597 if (time_spent < usec_cycle) {
598 unsigned long s = usec_cycle - time_spent;
86184d14
JA
599
600 td->rate_pending_usleep += s;
fad86e6a 601 if (td->rate_pending_usleep >= 100000) {
86184d14
JA
602 usec_sleep(td->rate_pending_usleep);
603 td->rate_pending_usleep = 0;
604 }
4240cfa1 605 } else {
9e850933 606 long overtime = time_spent - usec_cycle;
42b2b9fe 607
4240cfa1
JA
608 td->rate_pending_usleep -= overtime;
609 }
610}
611
5c24b2c4 612static int check_min_rate(struct thread_data *td, struct timeval *now)
4240cfa1 613{
7607bc6b 614 unsigned long spent;
4240cfa1
JA
615 unsigned long rate;
616
617 /*
618 * allow a 2 second settle period in the beginning
619 */
7607bc6b 620 if (mtime_since(&td->start, now) < 2000)
4240cfa1
JA
621 return 0;
622
623 /*
624 * if rate blocks is set, sample is running
625 */
49d2caab 626 if (td->rate_bytes) {
4240cfa1
JA
627 spent = mtime_since(&td->lastrate, now);
628 if (spent < td->ratecycle)
629 return 0;
630
49d2caab 631 rate = (td->this_io_bytes - td->rate_bytes) / spent;
4240cfa1
JA
632 if (rate < td->ratemin) {
633 printf("Client%d: min rate %d not met, got %ldKiB/sec\n", td->thread_number, td->ratemin, rate);
02bdd9ba 634 if (rate_quit)
e6402082 635 sig_handler(0);
4240cfa1
JA
636 return 1;
637 }
86184d14 638 }
4240cfa1 639
49d2caab 640 td->rate_bytes = td->this_io_bytes;
4240cfa1
JA
641 memcpy(&td->lastrate, now, sizeof(*now));
642 return 0;
86184d14
JA
643}
644
67903a2e
JA
645static inline int runtime_exceeded(struct thread_data *td, struct timeval *t)
646{
01f79976
JA
647 if (!td->timeout)
648 return 0;
67903a2e
JA
649 if (mtime_since(&td->start, t) >= td->timeout * 1000)
650 return 1;
651
652 return 0;
653}
654
e8457004
JA
655static void fill_random_bytes(struct thread_data *td,
656 unsigned char *p, unsigned int len)
657{
645785e5 658 unsigned int todo;
40ef7f64 659 double r;
e8457004
JA
660
661 while (len) {
40ef7f64 662 drand48_r(&td->verify_state, &r);
e8457004 663
40ef7f64
JA
664 /*
665 * lrand48_r seems to be broken and only fill the bottom
666 * 32-bits, even on 64-bit archs with 64-bit longs
667 */
668 todo = sizeof(r);
e8457004
JA
669 if (todo > len)
670 todo = len;
671
672 memcpy(p, &r, todo);
673
674 len -= todo;
675 p += todo;
676 }
677}
678
9d0c6ca2
JA
679static void hexdump(void *buffer, int len)
680{
681 unsigned char *p = buffer;
682 int i;
683
684 for (i = 0; i < len; i++)
685 printf("%02x", p[i]);
686 printf("\n");
687}
688
645785e5 689static int verify_io_u(struct io_u *io_u)
e8457004
JA
690{
691 struct verify_header *hdr = (struct verify_header *) io_u->buf;
692 unsigned char *p = (unsigned char *) io_u->buf;
693 struct md5_ctx md5_ctx;
9d0c6ca2 694 int ret;
e8457004 695
840b216f 696 if (hdr->fio_magic != FIO_HDR_MAGIC)
e8457004
JA
697 return 1;
698
699 memset(&md5_ctx, 0, sizeof(md5_ctx));
700 p += sizeof(*hdr);
701 md5_update(&md5_ctx, p, hdr->len - sizeof(*hdr));
702
9d0c6ca2
JA
703 ret = memcmp(hdr->md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
704 if (ret) {
705 hexdump(hdr->md5_digest, sizeof(hdr->md5_digest));
706 hexdump(md5_ctx.hash, sizeof(md5_ctx.hash));
707 }
708
709 return ret;
e8457004
JA
710}
711
cfc702bd
JA
712/*
713 * fill body of io_u->buf with random data and add a header with the
714 * (eg) sha1sum of that data.
715 */
e8457004 716static void populate_io_u(struct thread_data *td, struct io_u *io_u)
cfc702bd 717{
e8457004
JA
718 struct md5_ctx md5_ctx;
719 struct verify_header hdr;
720 unsigned char *p = (unsigned char *) io_u->buf;
721
722 hdr.fio_magic = FIO_HDR_MAGIC;
723 hdr.len = io_u->buflen;
724 p += sizeof(hdr);
725 fill_random_bytes(td, p, io_u->buflen - sizeof(hdr));
726
727 memset(&md5_ctx, 0, sizeof(md5_ctx));
728 md5_update(&md5_ctx, p, io_u->buflen - sizeof(hdr));
729 memcpy(hdr.md5_digest, md5_ctx.hash, sizeof(md5_ctx.hash));
730 memcpy(io_u->buf, &hdr, sizeof(hdr));
cfc702bd
JA
731}
732
2c83567e
JA
733static void put_io_u(struct thread_data *td, struct io_u *io_u)
734{
735 list_del(&io_u->list);
736 list_add(&io_u->list, &td->io_u_freelist);
737 td->cur_depth--;
738}
739
f0f3411b
JA
740#define queue_full(td) (list_empty(&(td)->io_u_freelist))
741
e8457004
JA
742static struct io_u *__get_io_u(struct thread_data *td)
743{
744 struct io_u *io_u;
745
f0f3411b 746 if (queue_full(td))
e8457004
JA
747 return NULL;
748
749 io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
750 list_del(&io_u->list);
751 list_add(&io_u->list, &td->io_u_busylist);
f4bb2243 752 td->cur_depth++;
e8457004
JA
753 return io_u;
754}
755
2c83567e
JA
756static struct io_u *get_io_u(struct thread_data *td)
757{
758 struct io_u *io_u;
759
e8457004
JA
760 io_u = __get_io_u(td);
761 if (!io_u)
2c83567e
JA
762 return NULL;
763
406e7b7c
JA
764 if (get_next_offset(td, &io_u->offset)) {
765 put_io_u(td, io_u);
49d2caab 766 return NULL;
406e7b7c 767 }
49d2caab 768
b2a369fb
JA
769 io_u->buflen = get_next_buflen(td);
770 if (!io_u->buflen) {
e8457004 771 put_io_u(td, io_u);
7889f07b 772 return NULL;
e8457004 773 }
2c83567e 774
49d2caab
JA
775 if (io_u->buflen + io_u->offset > td->io_size)
776 io_u->buflen = td->io_size - io_u->offset;
777
778 if (!td->sequential)
779 mark_random_map(td, io_u);
780
781 td->last_bytes += io_u->buflen;
782
9d0c6ca2 783 if (td->verify)
e8457004 784 populate_io_u(td, io_u);
cfc702bd 785
2c83567e
JA
786 if (td->use_aio) {
787 if (td_read(td))
788 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
789 else
790 io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
791 }
792
57d753e3 793 gettimeofday(&io_u->start_time, NULL);
2c83567e
JA
794 return io_u;
795}
796
40ef7f64
JA
797static inline void td_set_runstate(struct thread_data *td, int runstate)
798{
799 td->old_runstate = td->runstate;
800 td->runstate = runstate;
801}
802
645785e5
JA
803static int get_next_verify(struct thread_data *td,
804 unsigned long long *offset, unsigned int *len)
805{
806 struct io_piece *ipo;
807
808 if (list_empty(&td->io_hist_list))
809 return 1;
810
811 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
812 list_del(&ipo->list);
813
814 *offset = ipo->offset;
815 *len = ipo->len;
816 free(ipo);
817 return 0;
818}
819
9d0c6ca2
JA
820static void prune_io_piece_log(struct thread_data *td)
821{
822 struct io_piece *ipo;
823
824 while (!list_empty(&td->io_hist_list)) {
825 ipo = list_entry(td->io_hist_list.next, struct io_piece, list);
826
827 list_del(&ipo->list);
828 free(ipo);
829 }
830}
831
9d0c6ca2
JA
832/*
833 * log a succesful write, so we can unwind the log for verify
834 */
835static void log_io_piece(struct thread_data *td, struct io_u *io_u)
836{
49d2caab 837 struct io_piece *ipo = malloc(sizeof(struct io_piece));
9d0c6ca2
JA
838 struct list_head *entry;
839
840 INIT_LIST_HEAD(&ipo->list);
841 ipo->offset = io_u->offset;
842 ipo->len = io_u->buflen;
843
49d2caab
JA
844 /*
845 * for random io where the writes extend the file, it will typically
846 * be laid out with the block scattered as written. it's faster to
847 * read them in in that order again, so don't sort
848 */
849 if (td->sequential || !td->overwrite) {
9d0c6ca2
JA
850 list_add_tail(&ipo->list, &td->io_hist_list);
851 return;
852 }
853
854 /*
855 * for random io, sort the list so verify will run faster
856 */
857 entry = &td->io_hist_list;
858 while ((entry = entry->prev) != &td->io_hist_list) {
859 struct io_piece *__ipo = list_entry(entry, struct io_piece, list);
860
9d0c6ca2
JA
861 if (__ipo->offset < ipo->offset)
862 break;
863 }
864
865 list_add(&ipo->list, entry);
866}
867
91fc5dc9 868static void do_sync_verify(struct thread_data *td)
cfc702bd 869{
40ef7f64 870 struct timeval t;
e8457004 871 struct io_u *io_u = NULL;
645785e5 872 int ret;
e8457004 873
40ef7f64 874 td_set_runstate(td, TD_VERIFYING);
e8457004
JA
875
876 io_u = __get_io_u(td);
877
40ef7f64 878 if (!td->odirect) {
49d2caab 879 if (fadvise(td->fd, td->file_offset, td->io_size, POSIX_FADV_DONTNEED) < 0) {
40ef7f64
JA
880 td->error = errno;
881 goto out;
882 }
883 }
884
e8457004
JA
885 do {
886 if (td->terminate)
887 break;
40ef7f64
JA
888
889 gettimeofday(&t, NULL);
890 if (runtime_exceeded(td, &t))
891 break;
892
645785e5
JA
893 if (get_next_verify(td, &io_u->offset, &io_u->buflen))
894 break;
895
896 if (td->cur_off != io_u->offset) {
897 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
898 td->error = errno;
899 break;
900 }
901 }
e8457004
JA
902
903 ret = read(td->fd, io_u->buf, io_u->buflen);
904 if (ret < (int) io_u->buflen) {
905 if (ret == -1) {
906 td->error = errno;
907 break;
908 } else if (!ret)
909 break;
910 else
911 io_u->buflen = ret;
912 }
913
645785e5 914 if (verify_io_u(io_u))
e8457004
JA
915 break;
916
645785e5 917 td->cur_off = io_u->offset + io_u->buflen;
e8457004
JA
918 } while (1);
919
920out:
40ef7f64 921 td_set_runstate(td, TD_RUNNING);
e8457004 922 put_io_u(td, io_u);
cfc702bd
JA
923}
924
43000118 925static void do_sync_io(struct thread_data *td)
892199bd 926{
7889f07b 927 unsigned long msec, usec;
e8457004 928 struct io_u *io_u = NULL;
2c83567e 929 struct timeval e;
892199bd 930
49d2caab 931 while (td->this_io_bytes < td->io_size) {
892199bd
JA
932 int ret;
933
934 if (td->terminate)
935 break;
936
2c83567e 937 io_u = get_io_u(td);
7889f07b
JA
938 if (!io_u)
939 break;
2c83567e 940
63a09e51
JA
941 if (td->cur_off != io_u->offset) {
942 if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
943 td->error = errno;
944 break;
945 }
892199bd
JA
946 }
947
02983297 948 if (td_read(td))
2c83567e 949 ret = read(td->fd, io_u->buf, io_u->buflen);
892199bd 950 else
2c83567e 951 ret = write(td->fd, io_u->buf, io_u->buflen);
892199bd 952
2c83567e 953 if (ret < (int) io_u->buflen) {
892199bd
JA
954 if (ret == -1)
955 td->error = errno;
956 break;
957 }
958
62bb4285 959 if (td_write(td))
645785e5
JA
960 log_io_piece(td, io_u);
961
4240cfa1 962 td->io_blocks++;
49d2caab
JA
963 td->io_bytes += io_u->buflen;
964 td->this_io_bytes += io_u->buflen;
63a09e51 965 td->cur_off = io_u->offset + io_u->buflen;
4240cfa1 966
86184d14
JA
967 gettimeofday(&e, NULL);
968
57d753e3 969 usec = utime_since(&io_u->start_time, &e);
86184d14 970
9e850933 971 rate_throttle(td, usec, io_u->buflen);
892199bd 972
4240cfa1
JA
973 if (check_min_rate(td, &e)) {
974 td->error = ENODATA;
975 break;
976 }
892199bd 977
4240cfa1 978 msec = usec / 1000;
57d753e3 979 add_clat_sample(td, msec);
645785e5 980 add_bw_sample(td);
67903a2e
JA
981
982 if (runtime_exceeded(td, &e))
983 break;
2c83567e 984
cdf92433 985 put_io_u(td, io_u);
e8457004 986 io_u = NULL;
cdf92433 987
e97712ed
JA
988 if (td->thinktime)
989 usec_sleep(td->thinktime);
990
cdf92433
JA
991 if (should_fsync(td) && td->fsync_blocks &&
992 (td->io_blocks % td->fsync_blocks) == 0)
993 fsync(td->fd);
892199bd
JA
994 }
995
e8457004
JA
996 if (io_u)
997 put_io_u(td, io_u);
998
4240cfa1 999 if (should_fsync(td))
892199bd 1000 fsync(td->fd);
892199bd 1001}
43000118 1002
1ad72b11
JA
1003static int io_u_getevents(struct thread_data *td, int min, int max,
1004 struct timespec *t)
1005{
1006 int r;
1007
1008 do {
1009 r = io_getevents(td->aio_ctx, min, max, td->aio_events, t);
1010 if (r != -EAGAIN && r != -EINTR)
1011 break;
1012 } while (1);
1013
1014 return r;
1015}
1016
2c83567e 1017static int io_u_queue(struct thread_data *td, struct io_u *io_u)
56b0eff0 1018{
2c83567e 1019 struct iocb *iocb = &io_u->iocb;
56b0eff0
JA
1020 int ret;
1021
1022 do {
254605cd 1023 ret = io_submit(td->aio_ctx, 1, &iocb);
56b0eff0
JA
1024 if (ret == 1)
1025 return 0;
406e7b7c 1026 else if (ret == -EAGAIN)
56b0eff0 1027 usleep(100);
406e7b7c 1028 else if (ret == -EINTR)
a592bd33 1029 continue;
56b0eff0
JA
1030 else
1031 break;
1032 } while (1);
1033
a592bd33 1034 return ret;
56b0eff0
JA
1035}
1036
98168d55 1037#define iocb_time(iocb) ((unsigned long) (iocb)->data)
2c83567e
JA
1038#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
1039
f0f3411b 1040static int ios_completed(struct thread_data *td, int nr)
2c83567e
JA
1041{
1042 unsigned long msec;
1043 struct io_u *io_u;
1044 struct timeval e;
9e850933 1045 int i, bytes_done;
2c83567e 1046
f0f3411b 1047 gettimeofday(&e, NULL);
2c83567e 1048
9e850933 1049 for (i = 0, bytes_done = 0; i < nr; i++) {
2c83567e
JA
1050 io_u = ev_to_iou(td->aio_events + i);
1051
f0f3411b 1052 td->io_blocks++;
49d2caab
JA
1053 td->io_bytes += io_u->buflen;
1054 td->this_io_bytes += io_u->buflen;
8c033f93 1055
f0f3411b 1056 msec = mtime_since(&io_u->issue_time, &e);
2c83567e 1057
f0f3411b 1058 add_clat_sample(td, msec);
645785e5
JA
1059 add_bw_sample(td);
1060
62bb4285 1061 if (td_write(td))
645785e5 1062 log_io_piece(td, io_u);
2c83567e 1063
f4bb2243 1064 bytes_done += io_u->buflen;
2c83567e
JA
1065 put_io_u(td, io_u);
1066 }
9e850933
JA
1067
1068 return bytes_done;
2c83567e
JA
1069}
1070
1071static void cleanup_pending_aio(struct thread_data *td)
1072{
1073 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
1074 struct list_head *entry, *n;
1075 struct io_u *io_u;
1076 int r;
1077
1078 /*
1079 * get immediately available events, if any
1080 */
1ad72b11 1081 r = io_u_getevents(td, 0, td->cur_depth, &ts);
2c83567e 1082 if (r > 0)
f0f3411b 1083 ios_completed(td, r);
2c83567e
JA
1084
1085 /*
1086 * now cancel remaining active events
1087 */
1088 list_for_each_safe(entry, n, &td->io_u_busylist) {
1089 io_u = list_entry(entry, struct io_u, list);
1090
1091 r = io_cancel(td->aio_ctx, &io_u->iocb, td->aio_events);
1092 if (!r)
1093 put_io_u(td, io_u);
1094 }
1095
1096 if (td->cur_depth) {
1ad72b11 1097 r = io_u_getevents(td, td->cur_depth, td->cur_depth, NULL);
2c83567e 1098 if (r > 0)
f0f3411b 1099 ios_completed(td, r);
2c83567e
JA
1100 }
1101}
98168d55 1102
d32d9284
JA
1103static int async_do_verify(struct thread_data *td, struct io_u **io_u)
1104{
1105 struct io_u *v_io_u = *io_u;
1106 int ret = 0;
1107
1108 if (v_io_u) {
645785e5 1109 ret = verify_io_u(v_io_u);
d32d9284
JA
1110 put_io_u(td, v_io_u);
1111 *io_u = NULL;
1112 }
1113
1114 return ret;
1115}
1116
91fc5dc9 1117static void do_async_verify(struct thread_data *td)
cfc702bd 1118{
f4bb2243 1119 struct timeval t;
d32d9284 1120 struct io_u *io_u, *v_io_u = NULL;
645785e5 1121 int ret;
f4bb2243
JA
1122
1123 td_set_runstate(td, TD_VERIFYING);
1124
f4bb2243
JA
1125 do {
1126 if (td->terminate)
1127 break;
1128
1129 gettimeofday(&t, NULL);
1130 if (runtime_exceeded(td, &t))
1131 break;
1132
1133 io_u = __get_io_u(td);
1134 if (!io_u)
1135 break;
1136
645785e5
JA
1137 if (get_next_verify(td, &io_u->offset, &io_u->buflen)) {
1138 put_io_u(td, io_u);
1139 break;
f4bb2243
JA
1140 }
1141
1142 io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
1143 ret = io_u_queue(td, io_u);
1144 if (ret) {
1145 put_io_u(td, io_u);
1146 td->error = ret;
1147 break;
1148 }
1149
f0f3411b
JA
1150 /*
1151 * we have one pending to verify, do that while the next
1152 * we are doing io on the next one
1153 */
d32d9284
JA
1154 if (async_do_verify(td, &v_io_u))
1155 break;
f0f3411b 1156
1ad72b11 1157 ret = io_u_getevents(td, 1, 1, NULL);
f0f3411b
JA
1158 if (ret != 1) {
1159 if (ret < 0)
1160 td->error = ret;
f4bb2243
JA
1161 break;
1162 }
1163
f0f3411b 1164 v_io_u = ev_to_iou(td->aio_events);
f4bb2243 1165
645785e5 1166 td->cur_off = v_io_u->offset + v_io_u->buflen;
f0f3411b
JA
1167
1168 /*
d32d9284 1169 * if we can't submit more io, we need to verify now
f0f3411b 1170 */
d32d9284
JA
1171 if (queue_full(td) && async_do_verify(td, &v_io_u))
1172 break;
1173
f4bb2243
JA
1174 } while (1);
1175
d32d9284 1176 async_do_verify(td, &v_io_u);
f0f3411b 1177
f4bb2243
JA
1178 if (td->cur_depth)
1179 cleanup_pending_aio(td);
1180
1181 td_set_runstate(td, TD_RUNNING);
cfc702bd
JA
1182}
1183
43000118
JA
1184static void do_async_io(struct thread_data *td)
1185{
1186 struct timeval s, e;
7889f07b 1187 unsigned long usec;
43000118 1188
49d2caab 1189 while (td->this_io_bytes < td->io_size) {
43000118
JA
1190 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0};
1191 struct timespec *timeout;
2c83567e
JA
1192 int ret, min_evts = 0;
1193 struct io_u *io_u;
9e850933 1194 unsigned int bytes_done;
43000118
JA
1195
1196 if (td->terminate)
1197 break;
1198
2c83567e 1199 io_u = get_io_u(td);
7889f07b
JA
1200 if (!io_u)
1201 break;
43000118 1202
57d753e3 1203 memcpy(&s, &io_u->start_time, sizeof(s));
8baf1bcc 1204
2c83567e 1205 ret = io_u_queue(td, io_u);
56b0eff0 1206 if (ret) {
a3fdb993 1207 put_io_u(td, io_u);
a592bd33 1208 td->error = ret;
43000118
JA
1209 break;
1210 }
1211
57d753e3
JA
1212 gettimeofday(&io_u->issue_time, NULL);
1213 add_slat_sample(td, mtime_since(&io_u->start_time, &io_u->issue_time));
2c83567e 1214 if (td->cur_depth < td->aio_depth) {
43000118
JA
1215 timeout = &ts;
1216 min_evts = 0;
1217 } else {
1218 timeout = NULL;
1219 min_evts = 1;
1220 }
1221
1ad72b11 1222 ret = io_u_getevents(td, min_evts, td->cur_depth, timeout);
43000118 1223 if (ret < 0) {
406e7b7c 1224 td->error = ret;
43000118
JA
1225 break;
1226 } else if (!ret)
1227 continue;
1228
f0f3411b 1229 bytes_done = ios_completed(td, ret);
43000118 1230
98168d55
JA
1231 /*
1232 * the rate is batched for now, it should work for batches
1233 * of completions except the very first one which may look
1234 * a little bursty
1235 */
2c83567e 1236 gettimeofday(&e, NULL);
43000118
JA
1237 usec = utime_since(&s, &e);
1238
9e850933 1239 rate_throttle(td, usec, bytes_done);
43000118
JA
1240
1241 if (check_min_rate(td, &e)) {
1242 td->error = ENODATA;
1243 break;
1244 }
67903a2e
JA
1245
1246 if (runtime_exceeded(td, &e))
1247 break;
765d9223
JA
1248
1249 if (td->thinktime)
1250 usec_sleep(td->thinktime);
cdf92433
JA
1251
1252 if (should_fsync(td) && td->fsync_blocks &&
1253 (td->io_blocks % td->fsync_blocks) == 0)
1254 fsync(td->fd);
43000118 1255 }
56b0eff0 1256
2c83567e
JA
1257 if (td->cur_depth)
1258 cleanup_pending_aio(td);
4ac89145
JA
1259
1260 if (should_fsync(td))
1261 fsync(td->fd);
56b0eff0
JA
1262}
1263
1264static void cleanup_aio(struct thread_data *td)
1265{
254605cd
JA
1266 io_destroy(td->aio_ctx);
1267
43000118
JA
1268 if (td->aio_events)
1269 free(td->aio_events);
43000118
JA
1270}
1271
1272static int init_aio(struct thread_data *td)
1273{
254605cd 1274 if (io_queue_init(td->aio_depth, &td->aio_ctx)) {
43000118
JA
1275 td->error = errno;
1276 return 1;
1277 }
1278
43000118 1279 td->aio_events = malloc(td->aio_depth * sizeof(struct io_event));
43000118
JA
1280 return 0;
1281}
1282
2c83567e
JA
1283static void cleanup_io_u(struct thread_data *td)
1284{
1285 struct list_head *entry, *n;
1286 struct io_u *io_u;
1287
1288 list_for_each_safe(entry, n, &td->io_u_freelist) {
1289 io_u = list_entry(entry, struct io_u, list);
1290
1291 list_del(&io_u->list);
2c83567e
JA
1292 free(io_u);
1293 }
6b71c826 1294
99c6704f
JA
1295 if (td->mem_type == MEM_MALLOC)
1296 free(td->orig_buffer);
1297 else if (td->mem_type == MEM_SHM) {
1298 struct shmid_ds sbuf;
1299
1300 shmdt(td->orig_buffer);
1301 shmctl(td->shm_id, IPC_RMID, &sbuf);
1302 }
2c83567e
JA
1303}
1304
99c6704f 1305static int init_io_u(struct thread_data *td)
2c83567e
JA
1306{
1307 struct io_u *io_u;
99c6704f 1308 int i, max_units, mem_size;
6b71c826 1309 char *p;
2c83567e
JA
1310
1311 if (!td->use_aio)
1312 max_units = 1;
1313 else
1314 max_units = td->aio_depth;
1315
7889f07b 1316 mem_size = td->max_bs * max_units + MASK;
99c6704f
JA
1317
1318 if (td->mem_type == MEM_MALLOC)
1319 td->orig_buffer = malloc(mem_size);
1320 else if (td->mem_type == MEM_SHM) {
1321 td->shm_id = shmget(IPC_PRIVATE, mem_size, IPC_CREAT | 0600);
1322 if (td->shm_id < 0) {
1323 td->error = errno;
1324 perror("shmget");
1325 return 1;
1326 }
1327
1328 td->orig_buffer = shmat(td->shm_id, NULL, 0);
1329 if (td->orig_buffer == (void *) -1) {
1330 td->error = errno;
1331 perror("shmat");
1332 return 1;
1333 }
1334 }
6b71c826 1335
2c83567e
JA
1336 INIT_LIST_HEAD(&td->io_u_freelist);
1337 INIT_LIST_HEAD(&td->io_u_busylist);
645785e5 1338 INIT_LIST_HEAD(&td->io_hist_list);
2c83567e 1339
99c6704f 1340 p = ALIGN(td->orig_buffer);
2c83567e
JA
1341 for (i = 0; i < max_units; i++) {
1342 io_u = malloc(sizeof(*io_u));
1343 memset(io_u, 0, sizeof(*io_u));
1344 INIT_LIST_HEAD(&io_u->list);
1345
7889f07b 1346 io_u->buf = p + td->max_bs * i;
2c83567e
JA
1347 list_add(&io_u->list, &td->io_u_freelist);
1348 }
99c6704f
JA
1349
1350 return 0;
2c83567e
JA
1351}
1352
a0a9b35b
JA
1353static void setup_log(struct io_log **log)
1354{
1355 struct io_log *l = malloc(sizeof(*l));
1356
1357 l->nr_samples = 0;
1358 l->max_samples = 1024;
1359 l->log = malloc(l->max_samples * sizeof(struct io_sample));
1360 *log = l;
1361}
1362
1363static void finish_log(struct thread_data *td, struct io_log *log, char *name)
1364{
1365 char file_name[128];
1366 FILE *f;
645785e5 1367 unsigned int i;
a0a9b35b
JA
1368
1369 sprintf(file_name, "client%d_%s.log", td->thread_number, name);
1370 f = fopen(file_name, "w");
1371 if (!f) {
1372 perror("fopen log");
1373 return;
1374 }
1375
1376 for (i = 0; i < log->nr_samples; i++)
1377 fprintf(f, "%lu, %lu\n", log->log[i].time, log->log[i].val);
1378
1379 fclose(f);
1380 free(log->log);
1381 free(log);
1382}
1383
02983297
JA
1384static int create_file(struct thread_data *td)
1385{
7889f07b 1386 unsigned long long left;
645785e5 1387 unsigned int bs;
02983297 1388 char *b;
645785e5 1389 int r;
02983297 1390
02983297
JA
1391 /*
1392 * unless specifically asked for overwrite, let normal io extend it
1393 */
62bb4285 1394 if (td_write(td) && !td->overwrite)
02983297
JA
1395 return 0;
1396
57d753e3
JA
1397 if (!td->file_size) {
1398 fprintf(stderr, "Need size for create\n");
1399 td->error = EINVAL;
1400 return 1;
1401 }
1402
42fd89a7
JA
1403 printf("Client%d: Laying out IO file\n", td->thread_number);
1404
02983297
JA
1405 td->fd = open(td->file_name, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1406 if (td->fd < 0) {
1407 td->error = errno;
1408 return 1;
1409 }
1410
c94deb1c
JA
1411 if (ftruncate(td->fd, td->file_size) == -1) {
1412 td->error = errno;
1413 return 1;
1414 }
1415
49d2caab 1416 td->io_size = td->file_size;
7889f07b
JA
1417 b = malloc(td->max_bs);
1418 memset(b, 0, td->max_bs);
1419
1420 left = td->file_size;
1421 while (left) {
1422 bs = td->max_bs;
1423 if (bs > left)
1424 bs = left;
02983297 1425
7889f07b 1426 r = write(td->fd, b, bs);
02983297 1427
645785e5 1428 if (r == (int) bs) {
7889f07b 1429 left -= bs;
02983297 1430 continue;
7889f07b 1431 } else {
02983297
JA
1432 if (r < 0)
1433 td->error = errno;
1434 else
1435 td->error = EIO;
1436
1437 break;
1438 }
1439 }
1440
fc097bfe
JA
1441 if (td->create_fsync)
1442 fsync(td->fd);
1443
02983297
JA
1444 close(td->fd);
1445 td->fd = -1;
1446 free(b);
1447 return 0;
1448}
1449
1450static int file_exists(struct thread_data *td)
1451{
1452 struct stat st;
1453
1454 if (stat(td->file_name, &st) != -1)
1455 return 1;
1456
1457 return errno != ENOENT;
1458}
1459
c94deb1c 1460static int get_file_size(struct thread_data *td)
02983297 1461{
c94deb1c 1462 size_t bytes = 0;
02983297 1463 struct stat st;
c94deb1c
JA
1464
1465 if (fstat(td->fd, &st) == -1) {
1466 td->error = errno;
1467 return 1;
1468 }
1469
1470 /*
1471 * if block device, get size via BLKGETSIZE64 ioctl. try that as well
1472 * if this is a link, fall back to st.st_size if it fails
1473 */
1474 if (S_ISBLK(st.st_mode) || S_ISLNK(st.st_mode)) {
1475 if (ioctl(td->fd, BLKGETSIZE64, &bytes)) {
1476 if (S_ISBLK(st.st_mode)) {
1477 td->error = errno;
1478 return 1;
1479 } else
1480 bytes = st.st_size;
1481 }
1482 } else
1483 bytes = st.st_size;
1484
1485 if (td_read(td)) {
1486 if (td->file_size > bytes)
1487 bytes = td->file_size;
1488 } else {
1489 if (!td->file_size)
1490 td->file_size = 1024 * 1024 * 1024;
1491
1492 bytes = td->file_size;
1493 }
1494
1495 if (td->file_offset > bytes) {
1496 fprintf(stderr, "Client%d: offset larger than length\n", td->thread_number);
1497 return 1;
1498 }
1499
1500 td->io_size = bytes - td->file_offset;
1501 if (td->io_size == 0) {
1502 fprintf(stderr, "Client%d: no io blocks\n", td->thread_number);
1503 td->error = EINVAL;
1504 return 1;
1505 }
1506
1507 return 0;
1508}
1509
1510static int setup_file(struct thread_data *td)
1511{
02983297
JA
1512 int flags = 0;
1513
1514 if (!file_exists(td)) {
1515 if (!td->create_file) {
1516 td->error = ENOENT;
1517 return 1;
1518 }
1519 if (create_file(td))
1520 return 1;
1521 }
1522
1523 if (td->odirect)
1524 flags |= O_DIRECT;
1525
1526 if (td_read(td))
1527 td->fd = open(td->file_name, flags | O_RDONLY);
1528 else {
1529 if (!td->overwrite)
1530 flags |= O_TRUNC;
74b4b5fb
JA
1531 if (td->sync_io)
1532 flags |= O_SYNC;
e8457004
JA
1533 if (td->verify)
1534 flags |= O_RDWR;
1535 else
1536 flags |= O_WRONLY;
02983297 1537
e8457004 1538 td->fd = open(td->file_name, flags | O_CREAT, 0600);
02983297
JA
1539 }
1540
1541 if (td->fd == -1) {
1542 td->error = errno;
1543 return 1;
1544 }
1545
c94deb1c 1546 if (get_file_size(td))
49d2caab 1547 return 1;
49d2caab 1548
62bb4285 1549 if (td_write(td) && ftruncate(td->fd, td->file_size) == -1) {
c94deb1c 1550 td->error = errno;
02983297
JA
1551 return 1;
1552 }
1553
b95799ca 1554 if (td->invalidate_cache) {
c94deb1c 1555 if (fadvise(td->fd, td->file_offset, td->file_size, POSIX_FADV_DONTNEED) < 0) {
b95799ca
JA
1556 td->error = errno;
1557 return 1;
1558 }
1559 }
1560
02983297
JA
1561 return 0;
1562}
1563
d32d9284
JA
1564static void clear_io_state(struct thread_data *td)
1565{
9d0c6ca2
JA
1566 if (!td->use_aio)
1567 lseek(td->fd, SEEK_SET, 0);
1568
d32d9284 1569 td->cur_off = 0;
49d2caab
JA
1570 td->last_bytes = 0;
1571 td->stat_io_bytes = 0;
1572 td->this_io_bytes = 0;
1573
1574 if (td->file_map)
1575 memset(td->file_map, 0, td->num_maps * sizeof(long));
d32d9284
JA
1576}
1577
5c24b2c4 1578static void *thread_main(int shm_id, int offset, char *argv[])
892199bd
JA
1579{
1580 struct thread_data *td;
02983297 1581 int ret = 1;
2c83567e 1582 void *data;
892199bd 1583
7292613b
JA
1584 setsid();
1585
892199bd 1586 data = shmat(shm_id, NULL, 0);
4ac89145
JA
1587 if (data == (void *) -1) {
1588 perror("shmat");
1589 return NULL;
1590 }
1591
892199bd
JA
1592 td = data + offset * sizeof(struct thread_data);
1593 td->pid = getpid();
1594
99c6704f
JA
1595 if (init_io_u(td))
1596 goto err;
2c83567e 1597
18e0b78c
JA
1598 if (sched_setaffinity(td->pid, sizeof(td->cpumask), &td->cpumask) == -1) {
1599 td->error = errno;
1600 goto err;
1601 }
1602
4240cfa1 1603 sprintf(argv[0], "fio%d", offset);
892199bd 1604
43000118
JA
1605 if (td->use_aio && init_aio(td))
1606 goto err;
1607
f737299d 1608 if (td->ioprio) {
892199bd
JA
1609 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, td->ioprio) == -1) {
1610 td->error = errno;
599002b3 1611 goto err;
892199bd
JA
1612 }
1613 }
1614
1615 sem_post(&startup_sem);
1616 sem_wait(&td->mutex);
43000118 1617
fc097bfe
JA
1618 if (!td->create_serialize && setup_file(td))
1619 goto err;
1620
49d2caab
JA
1621 if (init_random_state(td))
1622 goto err;
1623
7292613b
JA
1624 gettimeofday(&td->start, NULL);
1625
92b229ed
JA
1626 getrusage(RUSAGE_SELF, &td->ru_start);
1627
293753bb
JA
1628 while (td->loops--) {
1629 gettimeofday(&td->stat_sample_time, NULL);
1630
1631 if (td->ratemin)
1632 memcpy(&td->lastrate, &td->stat_sample_time, sizeof(td->lastrate));
7292613b 1633
d32d9284 1634 clear_io_state(td);
9d0c6ca2 1635 prune_io_piece_log(td);
fd1ae4c9 1636
b2de0ed2 1637 if (!td->use_aio)
b6794fbf 1638 do_sync_io(td);
b2de0ed2
JA
1639 else
1640 do_async_io(td);
1641
91fc5dc9
JA
1642 if (td->error)
1643 break;
1644
b2de0ed2
JA
1645 if (!td->verify)
1646 continue;
cfc702bd 1647
b2de0ed2 1648 clear_io_state(td);
d32d9284 1649
91fc5dc9
JA
1650 if (!td->use_aio)
1651 do_sync_verify(td);
1652 else
1653 do_async_verify(td);
1654
1655 if (td->error)
1656 break;
b6794fbf 1657 }
7292613b 1658
be33abe4 1659 td->runtime = mtime_since_now(&td->start);
92b229ed 1660 getrusage(RUSAGE_SELF, &td->ru_end);
892199bd 1661 ret = 0;
a0a9b35b
JA
1662
1663 if (td->bw_log)
1664 finish_log(td, td->bw_log, "bw");
1665 if (td->lat_log)
1666 finish_log(td, td->lat_log, "lat");
4ac89145 1667
98dd52d6
JA
1668 if (exitall_on_terminate)
1669 sig_handler(0);
1670
892199bd 1671err:
7292613b
JA
1672 if (td->fd != -1) {
1673 close(td->fd);
1674 td->fd = -1;
1675 }
4ac89145
JA
1676 if (td->use_aio)
1677 cleanup_aio(td);
2c83567e 1678 cleanup_io_u(td);
599002b3 1679 if (ret) {
892199bd 1680 sem_post(&startup_sem);
599002b3
JA
1681 sem_wait(&td->mutex);
1682 }
40ef7f64 1683 td_set_runstate(td, TD_EXITED);
4240cfa1 1684 shmdt(data);
892199bd
JA
1685 return NULL;
1686}
1687
5c24b2c4 1688static void free_shm(void)
892199bd 1689{
c269123b
JA
1690 struct shmid_ds sbuf;
1691
1692 if (threads) {
1693 shmdt(threads);
1694 threads = NULL;
1695 shmctl(shm_id, IPC_RMID, &sbuf);
1696 }
892199bd
JA
1697}
1698
57d753e3
JA
1699static int calc_lat(struct io_stat *is, unsigned long *min, unsigned long *max,
1700 double *mean, double *dev)
1701{
1702 double n;
1703
1704 if (is->samples == 0)
1705 return 0;
1706
1707 *min = is->min_val;
1708 *max = is->max_val;
1709
1710 n = (double) is->samples;
1711 *mean = (double) is->val / n;
1712 *dev = sqrt(((double) is->val_sq - (*mean * *mean) / n) / (n - 1));
1713 return 1;
1714}
1715
5c24b2c4 1716static void show_thread_status(struct thread_data *td)
892199bd
JA
1717{
1718 int prio, prio_class;
92b229ed
JA
1719 unsigned long min, max, bw = 0, ctx;
1720 double mean, dev, usr_cpu, sys_cpu;
892199bd 1721
49d2caab 1722 if (!td->io_bytes && !td->error)
213b446c
JA
1723 return;
1724
892199bd 1725 if (td->runtime)
49d2caab 1726 bw = td->io_bytes / td->runtime;
892199bd
JA
1727
1728 prio = td->ioprio & 0xff;
1729 prio_class = td->ioprio >> IOPRIO_CLASS_SHIFT;
1730
49d2caab 1731 printf("Client%d: err=%2d, io=%6luMiB, bw=%6luKiB/s, runt=%6lumsec\n", td->thread_number, td->error, td->io_bytes >> 20, bw, td->runtime);
fd1ae4c9 1732
57d753e3
JA
1733 if (calc_lat(&td->slat_stat, &min, &max, &mean, &dev))
1734 printf(" slat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1735 if (calc_lat(&td->clat_stat, &min, &max, &mean, &dev))
1736 printf(" clat (msec): min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
1737 if (calc_lat(&td->bw_stat, &min, &max, &mean, &dev))
1738 printf(" bw (KiB/s) : min=%5lu, max=%5lu, avg=%5.02f, dev=%5.02f\n", min, max, mean, dev);
92b229ed
JA
1739
1740 if (td->runtime) {
1741 unsigned long t;
1742
1743 t = mtime_since(&td->ru_start.ru_utime, &td->ru_end.ru_utime);
1744 usr_cpu = (double) t * 100 / (double) td->runtime;
1745
1746 t = mtime_since(&td->ru_start.ru_stime, &td->ru_end.ru_stime);
1747 sys_cpu = (double) t * 100 / (double) td->runtime;
1748 } else {
1749 usr_cpu = 0;
1750 sys_cpu = 0;
1751 }
1752
1753 ctx = td->ru_end.ru_nvcsw + td->ru_end.ru_nivcsw - (td->ru_start.ru_nvcsw + td->ru_start.ru_nivcsw);
1754
1755 printf(" cpu : usr=%3.2f%%, sys=%3.2f%%, ctx=%lu\n", usr_cpu, sys_cpu, ctx);
892199bd
JA
1756}
1757
5c24b2c4 1758static int setup_rate(struct thread_data *td)
86184d14 1759{
4240cfa1
JA
1760 int nr_reads_per_sec;
1761
1762 if (!td->rate)
1763 return 0;
1764
1765 if (td->rate < td->ratemin) {
1766 fprintf(stderr, "min rate larger than nominal rate\n");
1767 return -1;
1768 }
86184d14 1769
49d2caab 1770 nr_reads_per_sec = (td->rate * 1024) / td->min_bs;
86184d14
JA
1771 td->rate_usec_cycle = 1000000 / nr_reads_per_sec;
1772 td->rate_pending_usleep = 0;
4240cfa1 1773 return 0;
86184d14
JA
1774}
1775
47d45203 1776static struct thread_data *get_new_job(int global)
892199bd 1777{
4240cfa1
JA
1778 struct thread_data *td;
1779
47d45203
JA
1780 if (global)
1781 return &def_thread;
8867c0a8 1782 if (thread_number >= max_jobs)
4240cfa1
JA
1783 return NULL;
1784
1785 td = &threads[thread_number++];
fc24389f 1786 memset(td, 0, sizeof(*td));
892199bd 1787
e4ed35c3 1788 td->fd = -1;
86184d14 1789 td->thread_number = thread_number;
76cb7b42 1790
47d45203 1791 td->ddir = def_thread.ddir;
76cb7b42
JA
1792 td->ioprio = def_thread.ioprio;
1793 td->sequential = def_thread.sequential;
47d45203 1794 td->bs = def_thread.bs;
8c033f93
JA
1795 td->min_bs = def_thread.min_bs;
1796 td->max_bs = def_thread.max_bs;
47d45203 1797 td->odirect = def_thread.odirect;
e97712ed 1798 td->thinktime = def_thread.thinktime;
76cb7b42
JA
1799 td->fsync_blocks = def_thread.fsync_blocks;
1800 td->start_delay = def_thread.start_delay;
67903a2e 1801 td->timeout = def_thread.timeout;
76cb7b42 1802 td->use_aio = def_thread.use_aio;
02983297
JA
1803 td->create_file = def_thread.create_file;
1804 td->overwrite = def_thread.overwrite;
b95799ca 1805 td->invalidate_cache = def_thread.invalidate_cache;
76cb7b42 1806 td->file_size = def_thread.file_size;
9b5cf6c0 1807 td->file_offset = def_thread.file_offset;
76cb7b42
JA
1808 td->rate = def_thread.rate;
1809 td->ratemin = def_thread.ratemin;
1810 td->ratecycle = def_thread.ratecycle;
1811 td->aio_depth = def_thread.aio_depth;
99c6704f
JA
1812 td->sync_io = def_thread.sync_io;
1813 td->mem_type = def_thread.mem_type;
1d035750 1814 td->bw_avg_time = def_thread.bw_avg_time;
fc097bfe
JA
1815 td->create_serialize = def_thread.create_serialize;
1816 td->create_fsync = def_thread.create_fsync;
b6794fbf 1817 td->loops = def_thread.loops;
cfc702bd 1818 td->verify = def_thread.verify;
2a81240d 1819 td->stonewall = def_thread.stonewall;
47d45203 1820 memcpy(&td->cpumask, &def_thread.cpumask, sizeof(td->cpumask));
f737299d
JA
1821
1822 return td;
1823}
1824
4240cfa1
JA
1825static void put_job(struct thread_data *td)
1826{
1827 memset(&threads[td->thread_number - 1], 0, sizeof(*td));
1828 thread_number--;
1829}
1830
5c24b2c4
JA
1831static int add_job(struct thread_data *td, const char *filename, int prioclass,
1832 int prio)
f737299d 1833{
47d45203
JA
1834 if (td == &def_thread)
1835 return 0;
1836
f737299d 1837 strcpy(td->file_name, filename);
4240cfa1 1838 sem_init(&td->mutex, 1, 0);
f737299d
JA
1839 td->ioprio = (prioclass << IOPRIO_CLASS_SHIFT) | prio;
1840
57d753e3
JA
1841 td->clat_stat.min_val = ULONG_MAX;
1842 td->slat_stat.min_val = ULONG_MAX;
1843 td->bw_stat.min_val = ULONG_MAX;
1844
8dbff0b1
JA
1845 run_str[td->thread_number - 1] = 'P';
1846
4ac89145
JA
1847 if (td->use_aio && !td->aio_depth)
1848 td->aio_depth = 1;
43000118 1849
645785e5 1850 if (td->min_bs == -1U)
8bdcfab5 1851 td->min_bs = td->bs;
645785e5 1852 if (td->max_bs == -1U)
8bdcfab5 1853 td->max_bs = td->bs;
645785e5 1854 if (td_read(td))
840b216f 1855 td->verify = 0;
8bdcfab5 1856
4240cfa1
JA
1857 if (setup_rate(td))
1858 return -1;
f737299d 1859
a0a9b35b
JA
1860 if (write_lat_log)
1861 setup_log(&td->lat_log);
1862 if (write_bw_log)
1863 setup_log(&td->bw_log);
1864
7889f07b 1865 printf("Client%d: file=%s, rw=%d, prio=%d/%d, seq=%d, odir=%d, bs=%d-%d, rate=%d, aio=%d, aio_depth=%d\n", td->thread_number, filename, td->ddir, prioclass, prio, td->sequential, td->odirect, td->min_bs, td->max_bs, td->rate, td->use_aio, td->aio_depth);
4240cfa1 1866 return 0;
892199bd
JA
1867}
1868
18e0b78c
JA
1869static void fill_cpu_mask(cpu_set_t cpumask, int cpu)
1870{
f737299d 1871 unsigned int i;
18e0b78c
JA
1872
1873 CPU_ZERO(&cpumask);
1874
1875 for (i = 0; i < sizeof(int) * 8; i++) {
1876 if ((1 << i) & cpu)
1877 CPU_SET(i, &cpumask);
1878 }
1879}
1880
7889f07b
JA
1881unsigned long get_mult(char c)
1882{
1883 switch (c) {
1884 case 'k':
1885 case 'K':
1886 return 1024;
1887 case 'm':
1888 case 'M':
1889 return 1024 * 1024;
1890 case 'g':
1891 case 'G':
1892 return 1024 * 1024 * 1024;
1893 default:
1894 return 1;
1895 }
1896}
1897
02983297
JA
1898/*
1899 * convert string after '=' into decimal value, noting any size suffix
1900 */
1901static int str_cnv(char *p, unsigned long long *val)
1902{
02983297
JA
1903 char *str;
1904 int len;
1905
1906 str = strstr(p, "=");
1907 if (!str)
1908 return 1;
1909
1910 str++;
1911 len = strlen(str);
02983297
JA
1912
1913 *val = strtoul(str, NULL, 10);
1914 if (*val == ULONG_MAX && errno == ERANGE)
1915 return 1;
1916
7889f07b 1917 *val *= get_mult(str[len - 2]);
02983297 1918 return 0;
02983297
JA
1919}
1920
02983297
JA
1921static int check_strcnv(char *p, char *name, unsigned long long *val)
1922{
1923 if (!strstr(p, name))
1924 return 1;
1925
1926 return str_cnv(p, val);
1927}
1928
99c6704f
JA
1929static int check_str(char *p, char *name, char *option)
1930{
1931 char *s = strstr(p, name);
1932
1933 if (!s)
1934 return 1;
1935
1936 s += strlen(name);
1937 if (strstr(s, option))
1938 return 0;
1939
1940 return 1;
1941}
1942
7889f07b
JA
1943static int check_range(char *p, char *name, unsigned long *s, unsigned long *e)
1944{
1945 char str[128];
1946 char s1, s2;
1947
1948 sprintf(str, "%s=%%lu%%c-%%lu%%c", name);
1949 if (sscanf(p, str, s, &s1, e, &s2) == 4) {
1950 *s *= get_mult(s1);
1951 *e *= get_mult(s2);
1952 return 0;
1953 }
1954
1955 sprintf(str, "%s = %%lu%%c-%%lu%%c", name);
1956 if (sscanf(p, str, s, &s1, e, &s2) == 4) {
1957 *s *= get_mult(s1);
1958 *e *= get_mult(s2);
1959 return 0;
1960 }
1961
1962 sprintf(str, "%s=%%lu-%%lu", name);
1963 if (sscanf(p, str, s, e) == 2)
1964 return 0;
1965
1966 sprintf(str, "%s = %%lu-%%lu", name);
1967 if (sscanf(p, str, s, e) == 2)
1968 return 0;
1969
1970 return 1;
1971
1972}
1973
5c24b2c4 1974static int check_int(char *p, char *name, unsigned int *val)
7dd1389e
JA
1975{
1976 char str[128];
1977
1978 sprintf(str, "%s=%%d", name);
1979 if (sscanf(p, str, val) == 1)
1980 return 0;
1981
1982 sprintf(str, "%s = %%d", name);
1983 if (sscanf(p, str, val) == 1)
1984 return 0;
1985
1986 return 1;
1987}
1988
7292613b 1989static int is_empty_or_comment(char *line)
7dd1389e
JA
1990{
1991 unsigned int i;
1992
7292613b 1993 for (i = 0; i < strlen(line); i++) {
7292613b 1994 if (line[i] == ';')
47d45203
JA
1995 return 1;
1996 if (!isspace(line[i]) && !iscntrl(line[i]))
7292613b
JA
1997 return 0;
1998 }
7dd1389e
JA
1999
2000 return 1;
2001}
2002
5c24b2c4 2003static int parse_jobs_ini(char *file)
7dd1389e 2004{
47d45203 2005 unsigned int prioclass, prio, cpu, global;
8c033f93 2006 unsigned long long ull;
7889f07b 2007 unsigned long ul1, ul2;
f737299d 2008 struct thread_data *td;
7dd1389e
JA
2009 char *string, *name;
2010 fpos_t off;
2011 FILE *f;
2012 char *p;
2013
2014 f = fopen(file, "r");
2015 if (!f) {
2016 perror("fopen");
4240cfa1 2017 return 1;
7dd1389e
JA
2018 }
2019
2020 string = malloc(4096);
2021 name = malloc(256);
2022
7dd1389e 2023 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b
JA
2024 if (is_empty_or_comment(p))
2025 continue;
7dd1389e
JA
2026 if (sscanf(p, "[%s]", name) != 1)
2027 continue;
2028
47d45203
JA
2029 global = !strncmp(name, "global", 6);
2030
7dd1389e
JA
2031 name[strlen(name) - 1] = '\0';
2032
47d45203 2033 td = get_new_job(global);
4240cfa1
JA
2034 if (!td)
2035 break;
f737299d 2036
7dd1389e 2037 prioclass = 2;
f737299d 2038 prio = 4;
7dd1389e
JA
2039
2040 fgetpos(f, &off);
2041 while ((p = fgets(string, 4096, f)) != NULL) {
7292613b 2042 if (is_empty_or_comment(p))
e6402082
JA
2043 continue;
2044 if (strstr(p, "["))
7dd1389e 2045 break;
f737299d 2046 if (!check_int(p, "rw", &td->ddir)) {
7dd1389e
JA
2047 fgetpos(f, &off);
2048 continue;
2049 }
2050 if (!check_int(p, "prio", &prio)) {
2051 fgetpos(f, &off);
2052 continue;
2053 }
2054 if (!check_int(p, "prioclass", &prioclass)) {
2055 fgetpos(f, &off);
2056 continue;
2057 }
f737299d 2058 if (!check_int(p, "direct", &td->odirect)) {
7dd1389e
JA
2059 fgetpos(f, &off);
2060 continue;
2061 }
f737299d 2062 if (!check_int(p, "rate", &td->rate)) {
7dd1389e
JA
2063 fgetpos(f, &off);
2064 continue;
2065 }
4240cfa1
JA
2066 if (!check_int(p, "ratemin", &td->ratemin)) {
2067 fgetpos(f, &off);
2068 continue;
2069 }
2070 if (!check_int(p, "ratecycle", &td->ratecycle)) {
2071 fgetpos(f, &off);
2072 continue;
2073 }
e97712ed 2074 if (!check_int(p, "thinktime", &td->thinktime)) {
7dd1389e
JA
2075 fgetpos(f, &off);
2076 continue;
2077 }
18e0b78c 2078 if (!check_int(p, "cpumask", &cpu)) {
f737299d 2079 fill_cpu_mask(td->cpumask, cpu);
18e0b78c
JA
2080 fgetpos(f, &off);
2081 continue;
2082 }
4240cfa1
JA
2083 if (!check_int(p, "fsync", &td->fsync_blocks)) {
2084 fgetpos(f, &off);
2085 continue;
2086 }
fc24389f
JA
2087 if (!check_int(p, "startdelay", &td->start_delay)) {
2088 fgetpos(f, &off);
2089 continue;
2090 }
67903a2e
JA
2091 if (!check_int(p, "timeout", &td->timeout)) {
2092 fgetpos(f, &off);
2093 continue;
2094 }
b95799ca
JA
2095 if (!check_int(p, "invalidate",&td->invalidate_cache)) {
2096 fgetpos(f, &off);
2097 continue;
2098 }
43000118
JA
2099 if (!check_int(p, "aio_depth", &td->aio_depth)) {
2100 fgetpos(f, &off);
2101 continue;
2102 }
74b4b5fb
JA
2103 if (!check_int(p, "sync", &td->sync_io)) {
2104 fgetpos(f, &off);
2105 continue;
2106 }
1d035750
JA
2107 if (!check_int(p, "bwavgtime", &td->bw_avg_time)) {
2108 fgetpos(f, &off);
2109 continue;
2110 }
fc097bfe
JA
2111 if (!check_int(p, "create_serialize", &td->create_serialize)) {
2112 fgetpos(f, &off);
2113 continue;
2114 }
2115 if (!check_int(p, "create_fsync", &td->create_fsync)) {
2116 fgetpos(f, &off);
2117 continue;
2118 }
b6794fbf
JA
2119 if (!check_int(p, "loops", &td->loops)) {
2120 fgetpos(f, &off);
2121 continue;
2122 }
e8457004
JA
2123 if (!check_int(p, "verify", &td->verify)) {
2124 fgetpos(f, &off);
2125 continue;
2126 }
7889f07b 2127 if (!check_range(p, "bsrange", &ul1, &ul2)) {
7f27790b
JA
2128 if (ul1 & 511)
2129 printf("bad min block size, must be a multiple of 512\n");
2130 else
2131 td->min_bs = ul1;
2132 if (ul2 & 511)
2133 printf("bad max block size, must be a multiple of 512\n");
2134 else
2135 td->max_bs = ul2;
7889f07b
JA
2136 fgetpos(f, &off);
2137 continue;
2138 }
8c033f93 2139 if (!check_strcnv(p, "bs", &ull)) {
7f27790b
JA
2140 if (ull & 511)
2141 printf("bad block size, must be a multiple of 512\n");
2142 else
2143 td->bs = ull;
8c033f93
JA
2144 fgetpos(f, &off);
2145 continue;
2146 }
02983297
JA
2147 if (!check_strcnv(p, "size", &td->file_size)) {
2148 fgetpos(f, &off);
2149 continue;
2150 }
2151 if (!check_strcnv(p, "offset", &td->file_offset)) {
2152 fgetpos(f, &off);
2153 continue;
2154 }
99c6704f 2155 if (!check_str(p, "mem", "malloc")) {
99c6704f
JA
2156 td->mem_type = MEM_MALLOC;
2157 fgetpos(f, &off);
2158 continue;
2159 }
2160 if (!check_str(p, "mem", "shm")) {
99c6704f
JA
2161 td->mem_type = MEM_SHM;
2162 fgetpos(f, &off);
2163 continue;
2164 }
43000118 2165 if (!strncmp(p, "sequential", 10)) {
f737299d 2166 td->sequential = 1;
7dd1389e
JA
2167 fgetpos(f, &off);
2168 continue;
2169 }
43000118 2170 if (!strncmp(p, "random", 6)) {
f737299d 2171 td->sequential = 0;
7dd1389e
JA
2172 fgetpos(f, &off);
2173 continue;
2174 }
43000118
JA
2175 if (!strncmp(p, "aio", 3)) {
2176 td->use_aio = 1;
2177 fgetpos(f, &off);
2178 continue;
2179 }
02983297
JA
2180 if (!strncmp(p, "create", 6)) {
2181 td->create_file = 1;
2182 fgetpos(f, &off);
2183 continue;
2184 }
2185 if (!strncmp(p, "overwrite", 9)) {
2186 td->overwrite = 1;
2187 fgetpos(f, &off);
2188 continue;
2189 }
98dd52d6
JA
2190 if (!strncmp(p, "exitall", 7)) {
2191 exitall_on_terminate = 1;
2192 fgetpos(f, &off);
2193 continue;
2194 }
2a81240d
JA
2195 if (!strncmp(p, "stonewall", 9)) {
2196 td->stonewall = 1;
2197 fgetpos(f, &off);
2198 continue;
2199 }
e6402082 2200 printf("Client%d: bad option %s\n",td->thread_number,p);
7dd1389e
JA
2201 }
2202 fsetpos(f, &off);
2203
4240cfa1
JA
2204 if (add_job(td, name, prioclass, prio))
2205 put_job(td);
7dd1389e
JA
2206 }
2207
2208 free(string);
2209 free(name);
fc7d63df 2210 fclose(f);
4240cfa1 2211 return 0;
7dd1389e
JA
2212}
2213
5c24b2c4 2214static int parse_options(int argc, char *argv[])
892199bd 2215{
01c4d8de 2216 int i;
892199bd
JA
2217
2218 for (i = 1; i < argc; i++) {
2219 char *parm = argv[i];
2220
2221 if (parm[0] != '-')
2222 break;
2223
2224 parm++;
2225 switch (*parm) {
2226 case 's':
2227 parm++;
47d45203 2228 def_thread.sequential = !!atoi(parm);
892199bd
JA
2229 break;
2230 case 'b':
2231 parm++;
47d45203
JA
2232 def_thread.bs = atoi(parm);
2233 def_thread.bs <<= 10;
2234 if (!def_thread.bs) {
4240cfa1 2235 printf("bad block size\n");
47d45203 2236 def_thread.bs = DEF_BS;
4240cfa1 2237 }
892199bd
JA
2238 break;
2239 case 't':
2240 parm++;
47d45203 2241 def_thread.timeout = atoi(parm);
892199bd 2242 break;
892199bd
JA
2243 case 'r':
2244 parm++;
2245 repeatable = !!atoi(parm);
2246 break;
02bdd9ba
JA
2247 case 'R':
2248 parm++;
2249 rate_quit = !!atoi(parm);
2250 break;
892199bd
JA
2251 case 'o':
2252 parm++;
47d45203 2253 def_thread.odirect = !!atoi(parm);
892199bd 2254 break;
7dd1389e
JA
2255 case 'f':
2256 if (i + 1 >= argc) {
2257 printf("-f needs file as arg\n");
2258 break;
2259 }
2260 ini_file = strdup(argv[i+1]);
a642279f 2261 i++;
7dd1389e 2262 break;
a0a9b35b
JA
2263 case 'l':
2264 write_lat_log = 1;
2265 break;
2266 case 'w':
2267 write_bw_log = 1;
2268 break;
892199bd 2269 default:
7dd1389e 2270 printf("bad option %s\n", argv[i]);
892199bd
JA
2271 break;
2272 }
2273 }
2274
892199bd
JA
2275 return i;
2276}
2277
3f39453a 2278static void print_thread_status(struct thread_data *td, int nr_running,
8dbff0b1 2279 int t_rate, int m_rate)
3f39453a 2280{
3f39453a
JA
2281 printf("Threads now running: %d", nr_running);
2282 if (m_rate || t_rate)
2283 printf(", commitrate %d/%dKiB/sec", t_rate, m_rate);
8dbff0b1
JA
2284 printf(" : [%s]\r", run_str);
2285 fflush(stdout);
3f39453a
JA
2286}
2287
40ef7f64
JA
2288static void check_str_update(struct thread_data *td, int n, int t, int m)
2289{
2290 char c = run_str[td->thread_number - 1];
2291
2292 if (td->runstate == td->old_runstate)
2293 return;
2294
2295 switch (td->runstate) {
2296 case TD_REAPED:
2297 c = '_';
2298 break;
f4bb2243
JA
2299 case TD_EXITED:
2300 c = 'E';
2301 break;
40ef7f64 2302 case TD_RUNNING:
af678352
JA
2303 if (td_read(td)) {
2304 if (td->sequential)
2305 c = 'R';
2306 else
2307 c = 'r';
2308 } else {
2309 if (td->sequential)
2310 c = 'W';
2311 else
2312 c = 'w';
2313 }
40ef7f64
JA
2314 break;
2315 case TD_VERIFYING:
2316 c = 'V';
2317 break;
2318 case TD_CREATED:
2319 c = 'C';
2320 break;
2321 case TD_NOT_CREATED:
2322 c = 'P';
2323 break;
2324 default:
2325 printf("state %d\n", td->runstate);
2326 }
2327
2328 run_str[td->thread_number - 1] = c;
2329 print_thread_status(td, n, t, m);
2330 td->old_runstate = td->runstate;
2331}
2332
213b446c 2333static void reap_threads(int *nr_running, int *t_rate, int *m_rate)
02bdd9ba 2334{
213b446c 2335 int i;
02bdd9ba 2336
3f39453a
JA
2337 /*
2338 * reap exited threads (TD_EXITED -> TD_REAPED)
2339 */
02bdd9ba
JA
2340 for (i = 0; i < thread_number; i++) {
2341 struct thread_data *td = &threads[i];
2342
40ef7f64
JA
2343 check_str_update(td, *nr_running, *t_rate, *m_rate);
2344
213b446c
JA
2345 if (td->runstate != TD_EXITED)
2346 continue;
02bdd9ba 2347
40ef7f64 2348 td_set_runstate(td, TD_REAPED);
213b446c
JA
2349 waitpid(td->pid, NULL, 0);
2350 (*nr_running)--;
2351 (*m_rate) -= td->ratemin;
2352 (*t_rate) -= td->rate;
40ef7f64 2353 check_str_update(td, *nr_running, *t_rate, *m_rate);
e6402082
JA
2354
2355 if (td->terminate)
2356 continue;
213b446c 2357 }
02bdd9ba
JA
2358}
2359
fc24389f
JA
2360static void run_threads(char *argv[])
2361{
be33abe4 2362 struct timeval genesis;
fc24389f
JA
2363 struct thread_data *td;
2364 unsigned long spent;
2a81240d 2365 int i, todo, nr_running, m_rate, t_rate, nr_started;
fc24389f 2366
fc24389f
JA
2367 printf("Starting %d threads\n", thread_number);
2368 fflush(stdout);
2369
7292613b
JA
2370 signal(SIGINT, sig_handler);
2371
fc24389f 2372 todo = thread_number;
02bdd9ba 2373 nr_running = 0;
2a81240d 2374 nr_started = 0;
213b446c 2375 m_rate = t_rate = 0;
fc24389f 2376
8bdcfab5
JA
2377 for (i = 0; i < thread_number; i++) {
2378 td = &threads[i];
2379
fc097bfe
JA
2380 if (!td->create_serialize)
2381 continue;
2382
8bdcfab5
JA
2383 /*
2384 * do file setup here so it happens sequentially,
2385 * we don't want X number of threads getting their
2386 * client data interspersed on disk
2387 */
2388 if (setup_file(td)) {
40ef7f64 2389 td_set_runstate(td, TD_REAPED);
8bdcfab5
JA
2390 todo--;
2391 }
2392 }
2393
2394 gettimeofday(&genesis, NULL);
2395
213b446c 2396 while (todo) {
3f39453a
JA
2397 /*
2398 * create threads (TD_NOT_CREATED -> TD_CREATED)
2399 */
fc24389f
JA
2400 for (i = 0; i < thread_number; i++) {
2401 td = &threads[i];
2402
02bdd9ba 2403 if (td->runstate != TD_NOT_CREATED)
fc24389f
JA
2404 continue;
2405
213b446c
JA
2406 /*
2407 * never got a chance to start, killed by other
2408 * thread for some reason
2409 */
2410 if (td->terminate) {
2411 todo--;
2412 continue;
2413 }
2414
fc24389f 2415 if (td->start_delay) {
be33abe4 2416 spent = mtime_since_now(&genesis);
fc24389f
JA
2417
2418 if (td->start_delay * 1000 > spent)
2419 continue;
2420 }
2421
2a81240d
JA
2422 if (td->stonewall && (nr_started || nr_running))
2423 continue;
2424
40ef7f64
JA
2425 td_set_runstate(td, TD_CREATED);
2426 check_str_update(td, nr_running, t_rate, m_rate);
fc24389f
JA
2427 sem_init(&startup_sem, 1, 1);
2428 todo--;
2a81240d 2429 nr_started++;
fc24389f
JA
2430
2431 if (fork())
2432 sem_wait(&startup_sem);
2433 else {
2434 thread_main(shm_id, i, argv);
2435 exit(0);
2436 }
2437 }
2438
3f39453a 2439 /*
e8457004 2440 * start created threads (TD_CREATED -> TD_RUNNING)
3f39453a 2441 */
fc24389f
JA
2442 for (i = 0; i < thread_number; i++) {
2443 struct thread_data *td = &threads[i];
2444
3f39453a
JA
2445 if (td->runstate != TD_CREATED)
2446 continue;
2447
40ef7f64 2448 td_set_runstate(td, TD_RUNNING);
3f39453a 2449 nr_running++;
2a81240d 2450 nr_started--;
3f39453a
JA
2451 m_rate += td->ratemin;
2452 t_rate += td->rate;
40ef7f64 2453 check_str_update(td, nr_running, t_rate, m_rate);
3f39453a 2454 sem_post(&td->mutex);
fc24389f
JA
2455 }
2456
e8457004
JA
2457 for (i = 0; i < thread_number; i++) {
2458 struct thread_data *td = &threads[i];
2459
2460 if (td->runstate == TD_RUNNING)
2461 run_str[td->thread_number - 1] = '+';
2462 else if (td->runstate == TD_VERIFYING)
2463 run_str[td->thread_number - 1] = 'V';
2464 else
2465 continue;
2466
40ef7f64 2467 check_str_update(td, nr_running, t_rate, m_rate);
e8457004
JA
2468 }
2469
213b446c 2470 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba 2471
fc24389f
JA
2472 if (todo)
2473 usleep(100000);
2474 }
02bdd9ba
JA
2475
2476 while (nr_running) {
213b446c 2477 reap_threads(&nr_running, &t_rate, &m_rate);
02bdd9ba
JA
2478 usleep(10000);
2479 }
fc24389f
JA
2480}
2481
8867c0a8 2482int setup_thread_area(void)
892199bd 2483{
8867c0a8
JA
2484 /*
2485 * 1024 is too much on some machines, scale max_jobs if
2486 * we get a failure that looks like too large a shm segment
2487 */
2488 do {
2489 int s = max_jobs * sizeof(struct thread_data);
18e0b78c 2490
8867c0a8
JA
2491 shm_id = shmget(0, s, IPC_CREAT | 0600);
2492 if (shm_id != -1)
2493 break;
2494 if (errno != EINVAL) {
2495 perror("shmget");
2496 break;
2497 }
2498
2499 max_jobs >>= 1;
d4fac444 2500 } while (max_jobs);
8867c0a8
JA
2501
2502 if (shm_id == -1)
892199bd 2503 return 1;
892199bd
JA
2504
2505 threads = shmat(shm_id, NULL, 0);
8867c0a8 2506 if (threads == (void *) -1) {
86184d14
JA
2507 perror("shmat");
2508 return 1;
2509 }
892199bd
JA
2510
2511 atexit(free_shm);
8867c0a8
JA
2512 return 0;
2513}
2514
2515int main(int argc, char *argv[])
2516{
8c033f93 2517 static unsigned long max_run[2], min_run[2];
57d753e3 2518 static unsigned long max_bw[2], min_bw[2];
22334044 2519 static unsigned long io_mb[2], agg[2];
8867c0a8
JA
2520 int i;
2521
2522 if (setup_thread_area())
2523 return 1;
892199bd 2524
47d45203 2525 if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &def_thread.cpumask) == -1) {
4240cfa1
JA
2526 perror("sched_getaffinity");
2527 return 1;
2528 }
2529
47d45203
JA
2530 /*
2531 * fill globals
2532 */
2533 def_thread.ddir = DDIR_READ;
2534 def_thread.bs = DEF_BS;
7889f07b
JA
2535 def_thread.min_bs = -1;
2536 def_thread.max_bs = -1;
02983297 2537 def_thread.odirect = DEF_ODIRECT;
47d45203 2538 def_thread.ratecycle = DEF_RATE_CYCLE;
02983297 2539 def_thread.sequential = DEF_SEQUENTIAL;
47d45203 2540 def_thread.timeout = DEF_TIMEOUT;
02983297
JA
2541 def_thread.create_file = DEF_CREATE;
2542 def_thread.overwrite = DEF_OVERWRITE;
b95799ca 2543 def_thread.invalidate_cache = DEF_INVALIDATE;
99c6704f
JA
2544 def_thread.sync_io = DEF_SYNCIO;
2545 def_thread.mem_type = MEM_MALLOC;
1d035750 2546 def_thread.bw_avg_time = DEF_BWAVGTIME;
fc097bfe
JA
2547 def_thread.create_serialize = DEF_CREATE_SER;
2548 def_thread.create_fsync = DEF_CREATE_FSYNC;
b6794fbf 2549 def_thread.loops = DEF_LOOPS;
cfc702bd 2550 def_thread.verify = DEF_VERIFY;
2a81240d 2551 def_thread.stonewall = DEF_STONEWALL;
47d45203 2552
892199bd 2553 i = parse_options(argc, argv);
7dd1389e 2554
5961d92c
JA
2555 if (!ini_file) {
2556 printf("Need job file\n");
2557 return 1;
2558 }
2559
2560 if (parse_jobs_ini(ini_file))
2561 return 1;
7dd1389e 2562
4240cfa1
JA
2563 if (!thread_number) {
2564 printf("Nothing to do\n");
2565 return 1;
2566 }
7dd1389e 2567
fc24389f 2568 run_threads(argv);
892199bd 2569
892199bd
JA
2570 min_bw[0] = min_run[0] = ~0UL;
2571 min_bw[1] = min_run[1] = ~0UL;
22334044
JA
2572 io_mb[0] = io_mb[1] = 0;
2573 agg[0] = agg[1] = 0;
892199bd
JA
2574 for (i = 0; i < thread_number; i++) {
2575 struct thread_data *td = &threads[i];
2576 unsigned long bw = 0;
2577
22334044
JA
2578 if (!td->error) {
2579 if (td->runtime < min_run[td->ddir])
2580 min_run[td->ddir] = td->runtime;
2581 if (td->runtime > max_run[td->ddir])
2582 max_run[td->ddir] = td->runtime;
892199bd 2583
892199bd 2584 if (td->runtime)
49d2caab 2585 bw = td->io_bytes / td->runtime;
22334044
JA
2586 if (bw < min_bw[td->ddir])
2587 min_bw[td->ddir] = bw;
2588 if (bw > max_bw[td->ddir])
2589 max_bw[td->ddir] = bw;
2590
49d2caab 2591 io_mb[td->ddir] += td->io_bytes >> 20;
892199bd
JA
2592 }
2593
2594 show_thread_status(td);
2595 }
22334044
JA
2596
2597 if (max_run[0])
49d2caab 2598 agg[0] = (io_mb[0] * 1024 * 1000) / max_run[0];
22334044 2599 if (max_run[1])
49d2caab 2600 agg[1] = (io_mb[1] * 1024 * 1000) / max_run[1];
22334044 2601
57d753e3 2602 printf("\nRun status:\n");
892199bd 2603 if (max_run[DDIR_READ])
22334044 2604 printf(" READ: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", io_mb[0], agg[0], min_bw[0], max_bw[0], min_run[0], max_run[0]);
892199bd 2605 if (max_run[DDIR_WRITE])
22334044 2606 printf(" WRITE: io=%luMiB, aggrb=%lu, minb=%lu, maxb=%lu, mint=%lumsec, maxt=%lumsec\n", io_mb[1], agg[1], min_bw[1], max_bw[1], min_run[1], max_run[1]);
fc24389f 2607
892199bd
JA
2608 return 0;
2609}