btreplay: fix sched_{set|get}affinity
[blktrace.git] / btreplay / btreplay.c
CommitLineData
d47a3fec
AB
1/*
2 * Blktrace replay utility - Play traces back
3 *
4 * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21static char build_date[] = __DATE__ " at "__TIME__;
22
23#include <assert.h>
24#include <errno.h>
25#include <fcntl.h>
26#include <libaio.h>
27#include <pthread.h>
28#include <sched.h>
29#include <signal.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <time.h>
34#include <unistd.h>
35#include <sys/param.h>
36#include <sys/stat.h>
37#include <sys/time.h>
38#include <sys/types.h>
39#include <dirent.h>
65a7043b 40#include <stdarg.h>
d47a3fec
AB
41
42#if !defined(_GNU_SOURCE)
43# define _GNU_SOURCE
44#endif
45#include <getopt.h>
46
47#include "list.h"
48#include "btrecord.h"
49
50/*
51 * ========================================================================
52 * ==== STRUCTURE DEFINITIONS =============================================
53 * ========================================================================
54 */
55
56/**
57 * Each device map has one of these:
58 *
59 * @head: Linked on to map_devs
60 * @from_dev: Device name as seen on recorded system
61 * @to_dev: Device name to be used on replay system
62 */
63struct map_dev {
64 struct list_head head;
65 char *from_dev, *to_dev;
66};
67
68/**
69 * Each device name specified has one of these (until threads are created)
70 *
71 * @head: Linked onto input_devs
72 * @devnm: Device name -- 'sd*'
73 */
74struct dev_info {
75 struct list_head head;
76 char *devnm;
77};
78
79/*
80 * Per input file information
81 *
82 * @head: Used to link up on input_files
83 * @free_iocbs: List of free iocb's available for use
84 * @used_iocbs: List of iocb's currently outstanding
85 * @mutex: Mutex used with condition variable to protect volatile values
86 * @cond: Condition variable used when waiting on a volatile value change
87 * @naios_out: Current number of AIOs outstanding on this context
88 * @naios_free: Number of AIOs on the free list (short cut for list_len)
89 * @send_wait: Boolean: When true, the sub thread is waiting on free IOCBs
90 * @reap_wait: Boolean: When true, the rec thread is waiting on used IOCBs
91 * @send_done: Boolean: When true, the sub thread has completed work
92 * @reap_done: Boolean: When true, the rec thread has completed work
93 * @sub_thread: Thread used to submit IOs.
94 * @rec_thread: Thread used to reclaim IOs.
95 * @ctx: IO context
96 * @devnm: Copy of the device name being managed by this thread
97 * @file_name: Full name of the input file
98 * @cpu: CPU this thread is pinned to
99 * @ifd: Input file descriptor
100 * @ofd: Output file descriptor
101 * @iterations: Remaining iterations to process
102 * @vfp: For verbose dumping of actions performed
103 */
104struct thr_info {
105 struct list_head head, free_iocbs, used_iocbs;
106 pthread_mutex_t mutex;
107 pthread_cond_t cond;
108 volatile long naios_out, naios_free;
109 volatile int send_wait, reap_wait, send_done, reap_done;
110 pthread_t sub_thread, rec_thread;
111 io_context_t ctx;
112 char *devnm, *file_name;
113 int cpu, ifd, ofd, iterations;
114 FILE *vfp;
115};
116
117/*
118 * Every Asynchronous IO used has one of these (naios per file/device).
119 *
120 * @iocb: IOCB sent down via io_submit
121 * @head: Linked onto file_list.free_iocbs or file_list.used_iocbs
122 * @tip: Pointer to per-thread information this IO is associated with
123 * @nbytes: Number of bytes in buffer associated with iocb
124 */
125struct iocb_pkt {
126 struct iocb iocb;
127 struct list_head head;
128 struct thr_info *tip;
129 int nbytes;
130};
131
132/*
133 * ========================================================================
134 * ==== GLOBAL VARIABLES ==================================================
135 * ========================================================================
136 */
137
138static volatile int signal_done = 0; // Boolean: Signal'ed, need to quit
139
140static char *ibase = "replay"; // Input base name
141static char *idir = "."; // Input directory base
142static int cpus_to_use = -1; // Number of CPUs to use
143static int def_iterations = 1; // Default number of iterations
144static int naios = 512; // Number of AIOs per thread
145static int ncpus = 0; // Number of CPUs in the system
146static int verbose = 0; // Boolean: Output some extra info
147static int write_enabled = 0; // Boolean: Enable writing
148static __u64 genesis = ~0; // Earliest time seen
149static __u64 rgenesis; // Our start time
150static size_t pgsize; // System Page size
151static int nb_sec = 512; // Number of bytes per sector
152static LIST_HEAD(input_devs); // List of devices to handle
153static LIST_HEAD(input_files); // List of input files to handle
154static LIST_HEAD(map_devs); // List of device maps
155static int nfiles = 0; // Number of files to handle
156static int no_stalls = 0; // Boolean: Disable pre-stalls
4a7968cc 157static unsigned acc_factor = 1; // Int: Acceleration factor
d47a3fec
AB
158static int find_records = 0; // Boolean: Find record files auto
159
160/*
161 * Variables managed under control of condition variables.
162 *
163 * n_reclaims_done: Counts number of reclaim threads that have completed.
164 * n_replays_done: Counts number of replay threads that have completed.
165 * n_replays_ready: Counts number of replay threads ready to start.
166 * n_iters_done: Counts number of replay threads done one iteration.
167 * iter_start: Starts an iteration for the replay threads.
168 */
169static volatile int n_reclaims_done = 0;
170static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER;
171static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER;
172
173static volatile int n_replays_done = 0;
174static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER;
175static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER;
176
177static volatile int n_replays_ready = 0;
178static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER;
179static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER;
180
181static volatile int n_iters_done = 0;
182static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER;
183static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER;
184
185static volatile int iter_start = 0;
186static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER;
187static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER;
188
189/*
190 * ========================================================================
191 * ==== FORWARD REFERENECES ===============================================
192 * ========================================================================
193 */
194
195static void *replay_sub(void *arg);
196static void *replay_rec(void *arg);
197static char usage_str[];
198
199/*
200 * ========================================================================
201 * ==== INLINE ROUTINES ===================================================
202 * ========================================================================
203 */
204
205/*
206 * The 'fatal' macro will output a perror message (if errstring is !NULL)
207 * and display a string (with variable arguments) and then exit with the
208 * specified exit value.
209 */
210#define ERR_ARGS 1
211#define ERR_SYSCALL 2
65a7043b
AB
212static inline void fatal(const char *errstring, const int exitval,
213 const char *fmt, ...)
214{
215 va_list ap;
216
217 if (errstring)
218 perror(errstring);
219
220 va_start(ap, fmt);
221 vfprintf(stderr, fmt, ap);
222 va_end(ap);
223
224 exit(exitval);
225 /*NOTREACHED*/
226}
d47a3fec
AB
227
228static inline long long unsigned du64_to_sec(__u64 du64)
229{
230 return (long long unsigned)du64 / (1000 * 1000 * 1000);
231}
232
233static inline long long unsigned du64_to_nsec(__u64 du64)
234{
235 return llabs((long long)du64) % (1000 * 1000 * 1000);
236}
237
238/**
239 * min - Return minimum of two integers
240 */
241static inline int min(int a, int b)
242{
243 return a < b ? a : b;
244}
245
246/**
247 * minl - Return minimum of two longs
248 */
249static inline long minl(long a, long b)
250{
251 return a < b ? a : b;
252}
253
254/**
255 * usage - Display usage string and version
256 */
257static inline void usage(void)
258{
259 fprintf(stderr, "Usage: btreplay -- version %s\n%s",
260 my_btversion, usage_str);
261}
262
263/**
264 * is_send_done - Returns true if sender should quit early
265 * @tip: Per-thread information
266 */
267static inline int is_send_done(struct thr_info *tip)
268{
269 return signal_done || tip->send_done;
270}
271
272/**
273 * is_reap_done - Returns true if reaper should quit early
274 * @tip: Per-thread information
275 */
276static inline int is_reap_done(struct thr_info *tip)
277{
278 return tip->send_done && tip->naios_out == 0;
279}
280
281/**
282 * ts2ns - Convert timespec values to a nanosecond value
283 */
284#define NS_TICKS ((__u64)1000 * (__u64)1000 * (__u64)1000)
285static inline __u64 ts2ns(struct timespec *ts)
286{
287 return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec);
288}
289
290/**
291 * ts2ns - Convert timeval values to a nanosecond value
292 */
293static inline __u64 tv2ns(struct timeval *tp)
294{
295 return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000);
296}
297
298/**
299 * touch_memory - Force physical memory to be allocating it
300 *
301 * For malloc()ed memory we need to /touch/ it to make it really
302 * exist. Otherwise, for write's (to storage) things may not work
303 * as planned - we see Linux just use a single area to /read/ from
304 * (as there isn't any memory that has been associated with the
305 * allocated virtual addresses yet).
306 */
307static inline void touch_memory(char *buf, size_t bsize)
308{
309#if defined(PREP_BUFS)
310 memset(buf, 0, bsize);
311#else
312 size_t i;
313
314 for (i = 0; i < bsize; i += pgsize)
315 buf[i] = 0;
316#endif
317}
318
319/**
320 * buf_alloc - Returns a page-aligned buffer of the specified size
321 * @nbytes: Number of bytes to allocate
322 */
323static inline void *buf_alloc(size_t nbytes)
324{
325 void *buf;
326
327 if (posix_memalign(&buf, pgsize, nbytes)) {
328 fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n");
329 /*NOTREACHED*/
330 }
331
332 return buf;
333}
334
335/**
336 * gettime - Returns current time
337 */
338static inline __u64 gettime(void)
339{
340 static int use_clock_gettime = -1; // Which clock to use
341
342 if (use_clock_gettime < 0) {
343 use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0;
344 if (use_clock_gettime) {
345 struct timespec ts = {
346 .tv_sec = 0,
347 .tv_nsec = 0
348 };
349 clock_settime(CLOCK_MONOTONIC, &ts);
350 }
351 }
352
353 if (use_clock_gettime) {
354 struct timespec ts;
355 clock_gettime(CLOCK_MONOTONIC, &ts);
356 return ts2ns(&ts);
357 }
358 else {
359 struct timeval tp;
360 gettimeofday(&tp, NULL);
361 return tv2ns(&tp);
362 }
363}
364
365/**
366 * setup_signal - Set up a signal handler for the specified signum
367 */
368static inline void setup_signal(int signum, sighandler_t handler)
369{
370 if (signal(signum, handler) == SIG_ERR) {
371 fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n",
372 signum);
373 /*NOTREACHED*/
374 }
375}
376
377/*
378 * ========================================================================
379 * ==== CONDITION VARIABLE ROUTINES =======================================
380 * ========================================================================
381 */
382
383/**
384 * __set_cv - Increments a variable under condition variable control.
385 * @pmp: Pointer to the associated mutex
386 * @pcp: Pointer to the associated condition variable
387 * @vp: Pointer to the variable being incremented
388 * @mxv: Max value for variable (Used only when ASSERTS are on)
389 */
390static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
391 volatile int *vp,
392 __attribute__((__unused__))int mxv)
393{
394 pthread_mutex_lock(pmp);
395 assert(*vp < mxv);
396 *vp += 1;
397 pthread_cond_signal(pcp);
398 pthread_mutex_unlock(pmp);
399}
400
401/**
402 * __wait_cv - Waits for a variable under cond var control to hit a value
403 * @pmp: Pointer to the associated mutex
404 * @pcp: Pointer to the associated condition variable
405 * @vp: Pointer to the variable being incremented
406 * @mxv: Value to wait for
407 */
408static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
409 volatile int *vp, int mxv)
410{
411 pthread_mutex_lock(pmp);
412 while (*vp < mxv)
413 pthread_cond_wait(pcp, pmp);
414 *vp = 0;
415 pthread_mutex_unlock(pmp);
416}
417
418static inline void set_reclaim_done(void)
419{
420 __set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
421 nfiles);
422}
423
424static inline void wait_reclaims_done(void)
425{
426 __wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
427 nfiles);
428}
429
430static inline void set_replay_ready(void)
431{
432 __set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
433 nfiles);
434}
435
436static inline void wait_replays_ready(void)
437{
438 __wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
439 nfiles);
440}
441
442static inline void set_replay_done(void)
443{
444 __set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
445 nfiles);
446}
447
448static inline void wait_replays_done(void)
449{
450 __wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
451 nfiles);
452}
453
454static inline void set_iter_done(void)
455{
456 __set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
457 nfiles);
458}
459
460static inline void wait_iters_done(void)
461{
462 __wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
463 nfiles);
464}
465
466/**
467 * wait_iter_start - Wait for an iteration to start
468 *
469 * This is /slightly/ different: we are waiting for a value to become
470 * non-zero, and then we decrement it and go on.
471 */
472static inline void wait_iter_start(void)
473{
474 pthread_mutex_lock(&iter_start_mutex);
475 while (iter_start == 0)
476 pthread_cond_wait(&iter_start_cond, &iter_start_mutex);
477 assert(1 <= iter_start && iter_start <= nfiles);
478 iter_start--;
479 pthread_mutex_unlock(&iter_start_mutex);
480}
481
482/**
483 * start_iter - Start an iteration at the replay thread level
484 */
485static inline void start_iter(void)
486{
487 pthread_mutex_lock(&iter_start_mutex);
488 assert(iter_start == 0);
489 iter_start = nfiles;
490 pthread_cond_broadcast(&iter_start_cond);
491 pthread_mutex_unlock(&iter_start_mutex);
492}
493
494/*
495 * ========================================================================
496 * ==== CPU RELATED ROUTINES ==============================================
497 * ========================================================================
498 */
499
500/**
501 * get_ncpus - Sets up the global 'ncpus' value
502 */
503static void get_ncpus(void)
504{
80c4041b
AA
505#ifdef _SC_NPROCESSORS_ONLN
506 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
fb697494 507#else
0a915aab
NZ
508 int nrcpus = 4096;
509 cpu_set_t * cpus;
510
511realloc:
512 cpus = CPU_ALLOC(nrcpus);
513 size = CPU_ALLOC_SIZE(nrcpus);
514 CPU_ZERO_S(size, cpus);
515
7338236a 516 if (sched_getaffinity(0, size, cpus)) {
0a915aab
NZ
517 if( errno == EINVAL && nrcpus < (4096<<4) ) {
518 CPU_FREE(cpus);
2564a602 519 nrcpus <<= 1;
0a915aab
NZ
520 goto realloc;
521 }
d47a3fec
AB
522 fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n");
523 /*NOTREACHED*/
524 }
525
0c2df13d
NZ
526 ncpus = -1;
527 for (last_cpu = 0; last_cpu < CPU_SETSIZE && CPU_ISSET(last_cpu, &cpus); last_cpu++)
528 if (CPU_ISSET( last_cpu, &cpus) )
529 ncpus = last_cpu;
530 ncpus++;
0a915aab 531 CPU_FREE(cpus);
fb697494 532#endif
d47a3fec
AB
533 if (ncpus == 0) {
534 fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n");
535 /*NOTREACHED*/
536 }
537}
538
539/**
540 * pin_to_cpu - Pin this thread to a specific CPU
541 * @tip: Thread information
542 */
543static void pin_to_cpu(struct thr_info *tip)
544{
0a915aab
NZ
545 cpu_set_t *cpus;
546 size_t size;
547
548 cpus = CPU_ALLOC(ncpus);
549 size = CPU_ALLOC_SIZE(ncpus);
d47a3fec
AB
550
551 assert(0 <= tip->cpu && tip->cpu < ncpus);
552
f6541f75 553 CPU_ZERO_S(size, cpus);
0a915aab 554 CPU_SET_S(tip->cpu, size, cpus);
7338236a 555 if (sched_setaffinity(0, size, cpus)) {
d47a3fec
AB
556 fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n");
557 /*NOTREACHED*/
558 }
7338236a 559 assert(tip->cpu == sched_getcpu());
d47a3fec
AB
560
561 if (verbose > 1) {
562 int i;
0a915aab 563 cpu_set_t *now = CPU_ALLOC(ncpus);
d47a3fec 564
7338236a 565 (void)sched_getaffinity(0, size, now);
d47a3fec
AB
566 fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu);
567 for (i = 0; i < ncpus; i++)
0a915aab 568 fprintf(tip->vfp, "%1d", CPU_ISSET_S(i, size, now));
d47a3fec
AB
569 fprintf(tip->vfp, "\n");
570 }
571}
572
573/*
574 * ========================================================================
575 * ==== INPUT DEVICE HANDLERS =============================================
576 * ========================================================================
577 */
578
579/**
580 * add_input_dev - Add a device ('sd*') to the list of devices to handle
581 */
582static void add_input_dev(char *devnm)
583{
584 struct list_head *p;
585 struct dev_info *dip;
586
587 __list_for_each(p, &input_devs) {
588 dip = list_entry(p, struct dev_info, head);
589 if (strcmp(dip->devnm, devnm) == 0)
590 return;
591 }
592
593 dip = malloc(sizeof(*dip));
594 dip->devnm = strdup(devnm);
595 list_add_tail(&dip->head, &input_devs);
596}
597
598/**
599 * rem_input_dev - Remove resources associated with this device
600 */
601static void rem_input_dev(struct dev_info *dip)
602{
603 list_del(&dip->head);
604 free(dip->devnm);
605 free(dip);
606}
607
608static void find_input_devs(char *idir)
609{
610 struct dirent *ent;
611 DIR *dir = opendir(idir);
612
613 if (dir == NULL) {
614 fatal(idir, ERR_ARGS, "Unable to open %s\n", idir);
615 /*NOTREACHED*/
616 }
617
618 while ((ent = readdir(dir)) != NULL) {
6ca1e530 619 char *p, *dsf;
d47a3fec
AB
620
621 if (strstr(ent->d_name, ".replay.") == NULL)
622 continue;
623
624 dsf = strdup(ent->d_name);
625 p = index(dsf, '.');
626 assert(p != NULL);
627 *p = '\0';
628 add_input_dev(dsf);
629 free(dsf);
630 }
631
632 closedir(dir);
633}
634
635/*
636 * ========================================================================
637 * ==== MAP DEVICE INTERFACES =============================================
638 * ========================================================================
639 */
640
641/**
642 * read_map_devs - Read in a set of device mapping from the provided file.
643 * @file_name: File containing device maps
644 *
645 * We support the notion of multiple such files being specifed on the cmd line
646 */
647static void read_map_devs(char *file_name)
648{
649 FILE *fp;
dd093eb1 650 char from_dev[256], to_dev[256];
d47a3fec
AB
651
652 fp = fopen(file_name, "r");
653 if (!fp) {
654 fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n");
655 /*NOTREACHED*/
656 }
657
dd093eb1 658 while (fscanf(fp, "%s %s", from_dev, to_dev) == 2) {
d47a3fec
AB
659 struct map_dev *mdp = malloc(sizeof(*mdp));
660
661 mdp->from_dev = from_dev;
662 mdp->to_dev = to_dev;
663 list_add_tail(&mdp->head, &map_devs);
664 }
665
666 fclose(fp);
667}
668
669/**
670 * release_map_devs - Release resources associated with device mappings.
671 */
672static void release_map_devs(void)
673{
674 struct list_head *p, *q;
675
676 list_for_each_safe(p, q, &map_devs) {
677 struct map_dev *mdp = list_entry(p, struct map_dev, head);
678
679 list_del(&mdp->head);
680
681 free(mdp->from_dev);
682 free(mdp->to_dev);
683 free(mdp);
684 }
685}
686
687/**
688 * map_dev - Return the mapped device for that specified
689 * @from_dev: Device name as seen on recorded system
690 *
691 * Note: If there is no such mapping, we return the same name.
692 */
693static char *map_dev(char *from_dev)
694{
695 struct list_head *p;
696
697 __list_for_each(p, &map_devs) {
698 struct map_dev *mdp = list_entry(p, struct map_dev, head);
699
700 if (strcmp(from_dev, mdp->from_dev) == 0)
701 return mdp->to_dev;
702 }
703
704 return from_dev;
705}
706
707/*
708 * ========================================================================
709 * ==== IOCB MANAGEMENT ROUTINES ==========================================
710 * ========================================================================
711 */
712
713/**
714 * iocb_init - Initialize the fields of an IOCB
715 * @tip: Per-thread information
716 * iocbp: IOCB pointer to update
717 */
718static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp)
719{
720 iocbp->tip = tip;
721 iocbp->nbytes = 0;
722 iocbp->iocb.u.c.buf = NULL;
723}
724
725/**
726 * iocb_setup - Set up an iocb with this AIOs information
727 * @iocbp: IOCB pointer to update
728 * @rw: Direction (0 == write, 1 == read)
729 * @n: Number of bytes to transfer
730 * @off: Offset (in bytes)
731 */
732static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off)
733{
734 char *buf;
735 struct iocb *iop = &iocbp->iocb;
736
737 assert(rw == 0 || rw == 1);
738 assert(0 < n && (n % nb_sec) == 0);
739 assert(0 <= off);
740
741 if (iocbp->nbytes) {
742 if (iocbp->nbytes >= n) {
743 buf = iop->u.c.buf;
744 goto prep;
745 }
746
747 assert(iop->u.c.buf);
748 free(iop->u.c.buf);
749 }
750
751 buf = buf_alloc(n);
752 iocbp->nbytes = n;
753
754prep:
755 if (rw)
756 io_prep_pread(iop, iocbp->tip->ofd, buf, n, off);
757 else {
758 assert(write_enabled);
759 io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off);
760 touch_memory(buf, n);
761 }
762
763 iop->data = iocbp;
764}
765
766/*
767 * ========================================================================
768 * ==== PER-THREAD SET UP & TEAR DOWN =====================================
769 * ========================================================================
770 */
771
772/**
773 * tip_init - Per thread initialization function
774 */
775static void tip_init(struct thr_info *tip)
776{
777 int i;
778
779 INIT_LIST_HEAD(&tip->free_iocbs);
780 INIT_LIST_HEAD(&tip->used_iocbs);
781
782 pthread_mutex_init(&tip->mutex, NULL);
783 pthread_cond_init(&tip->cond, NULL);
784
785 if (io_setup(naios, &tip->ctx)) {
786 fatal("io_setup", ERR_SYSCALL, "io_setup failed\n");
787 /*NOTREACHED*/
788 }
789
790 tip->ofd = -1;
791 tip->naios_out = 0;
792 tip->send_done = tip->reap_done = 0;
793 tip->send_wait = tip->reap_wait = 0;
794
795 memset(&tip->sub_thread, 0, sizeof(tip->sub_thread));
796 memset(&tip->rec_thread, 0, sizeof(tip->rec_thread));
797
798 for (i = 0; i < naios; i++) {
799 struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp));
800
801 iocb_init(tip, iocbp);
802 list_add_tail(&iocbp->head, &tip->free_iocbs);
803 }
804 tip->naios_free = naios;
805
806 if (verbose > 1) {
807 char fn[MAXPATHLEN];
808
809 sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase,
810 tip->cpu);
811 tip->vfp = fopen(fn, "w");
812 if (!tip->vfp) {
813 fatal(fn, ERR_SYSCALL, "Failed to open report\n");
814 /*NOTREACHED*/
815 }
816
817 setlinebuf(tip->vfp);
818 }
819
820 if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) {
821 fatal("pthread_create", ERR_SYSCALL,
822 "thread create failed\n");
823 /*NOTREACHED*/
824 }
825
826 if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) {
827 fatal("pthread_create", ERR_SYSCALL,
828 "thread create failed\n");
829 /*NOTREACHED*/
830 }
831}
832
833/**
834 * tip_release - Release resources associated with this thread
835 */
836static void tip_release(struct thr_info *tip)
837{
838 struct list_head *p, *q;
839
840 assert(tip->send_done);
841 assert(tip->reap_done);
842 assert(list_len(&tip->used_iocbs) == 0);
843 assert(tip->naios_free == naios);
844
845 if (pthread_join(tip->sub_thread, NULL)) {
846 fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n");
847 /*NOTREACHED*/
848 }
849 if (pthread_join(tip->rec_thread, NULL)) {
850 fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n");
851 /*NOTREACHED*/
852 }
853
854 io_destroy(tip->ctx);
855
856 list_splice(&tip->used_iocbs, &tip->free_iocbs);
857 list_for_each_safe(p, q, &tip->free_iocbs) {
858 struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head);
859
860 list_del(&iocbp->head);
861 if (iocbp->nbytes)
862 free(iocbp->iocb.u.c.buf);
863 free(iocbp);
864 }
865
866 pthread_cond_destroy(&tip->cond);
867 pthread_mutex_destroy(&tip->mutex);
868}
869
870/**
871 * add_input_file - Allocate and initialize per-input file structure
872 * @cpu: CPU for this file
873 * @devnm: Device name for this file
874 * @file_name: Fully qualifed input file name
875 */
876static void add_input_file(int cpu, char *devnm, char *file_name)
877{
878 struct stat buf;
879 struct io_file_hdr hdr;
880 struct thr_info *tip = buf_alloc(sizeof(*tip));
881 __u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub);
882
883 assert(0 <= cpu && cpu < ncpus);
884
885 memset(&hdr, 0, sizeof(hdr));
886 memset(tip, 0, sizeof(*tip));
887 tip->cpu = cpu % cpus_to_use;
888 tip->iterations = def_iterations;
889
890 tip->ifd = open(file_name, O_RDONLY);
891 if (tip->ifd < 0) {
892 fatal(file_name, ERR_ARGS, "Unable to open\n");
893 /*NOTREACHED*/
894 }
895 if (fstat(tip->ifd, &buf) < 0) {
896 fatal(file_name, ERR_SYSCALL, "fstat failed\n");
897 /*NOTREACHED*/
898 }
899 if (buf.st_size < (off_t)sizeof(hdr)) {
900 if (verbose)
901 fprintf(stderr, "\t%s empty\n", file_name);
902 goto empty_file;
903 }
904
905 if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
906 fatal(file_name, ERR_ARGS, "Header read failed\n");
907 /*NOTREACHED*/
908 }
909
910 if (hdr.version != my_version) {
911 fprintf(stderr, "%llx %llx %llx %llx\n",
912 (long long unsigned)hdr.version,
913 (long long unsigned)hdr.genesis,
914 (long long unsigned)hdr.nbunches,
915 (long long unsigned)hdr.total_pkts);
916 fatal(NULL, ERR_ARGS,
917 "BT version mismatch: %lx versus my %lx\n",
918 (long)hdr.version, (long)my_version);
919
920 }
921
922 if (hdr.nbunches == 0) {
923empty_file:
924 close(tip->ifd);
925 free(tip);
926 return;
927 }
928
929 if (hdr.genesis < genesis) {
930 if (verbose > 1)
931 fprintf(stderr, "Setting genesis to %llu.%llu\n",
932 du64_to_sec(hdr.genesis),
933 du64_to_nsec(hdr.genesis));
934 genesis = hdr.genesis;
935 }
936
937 tip->devnm = strdup(devnm);
938 tip->file_name = strdup(file_name);
939
940 list_add_tail(&tip->head, &input_files);
941
942 if (verbose)
943 fprintf(stderr, "Added %s %llu\n", file_name,
944 (long long)hdr.genesis);
945}
946
947/**
948 * rem_input_file - Release resources associated with an input file
949 * @tip: Per-input file information
950 */
951static void rem_input_file(struct thr_info *tip)
952{
953 list_del(&tip->head);
954
955 tip_release(tip);
956
957 close(tip->ofd);
958 close(tip->ifd);
959 free(tip->file_name);
960 free(tip->devnm);
961 free(tip);
962}
963
964/**
965 * rem_input_files - Remove all input files
966 */
967static void rem_input_files(void)
968{
969 struct list_head *p, *q;
970
971 list_for_each_safe(p, q, &input_files) {
972 rem_input_file(list_entry(p, struct thr_info, head));
973 }
974}
975
976/**
977 * __find_input_files - Find input files associated with this device (per cpu)
978 */
979static void __find_input_files(struct dev_info *dip)
980{
981 int cpu = 0;
982
983 for (;;) {
984 char full_name[MAXPATHLEN];
985
986 sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu);
987 if (access(full_name, R_OK) != 0)
988 break;
989
990 add_input_file(cpu, dip->devnm, full_name);
991 cpu++;
992 }
993
994 if (!cpu) {
995 fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm);
996 /*NOTREACHED*/
997 }
998
999 rem_input_dev(dip);
1000}
1001
1002
1003/**
1004 * find_input_files - Find input files for all devices
1005 */
1006static void find_input_files(void)
1007{
1008 struct list_head *p, *q;
1009
1010 list_for_each_safe(p, q, &input_devs) {
1011 __find_input_files(list_entry(p, struct dev_info, head));
1012 }
1013}
1014
1015/*
1016 * ========================================================================
1017 * ==== RECLAIM ROUTINES ==================================================
1018 * ========================================================================
1019 */
1020
1021/**
1022 * reap_wait_aios - Wait for and return number of outstanding AIOs
1023 *
1024 * Will return 0 if we are done
1025 */
1026static int reap_wait_aios(struct thr_info *tip)
1027{
1028 int naios = 0;
1029
1030 if (!is_reap_done(tip)) {
1031 pthread_mutex_lock(&tip->mutex);
1032 while (tip->naios_out == 0) {
1033 tip->reap_wait = 1;
1034 if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1035 fatal("pthread_cond_wait", ERR_SYSCALL,
1036 "nfree_current cond wait failed\n");
1037 /*NOTREACHED*/
1038 }
1039 }
1040 naios = tip->naios_out;
1041 pthread_mutex_unlock(&tip->mutex);
1042 }
1043 assert(is_reap_done(tip) || naios > 0);
1044
1045 return is_reap_done(tip) ? 0 : naios;
1046}
1047
1048/**
1049 * reclaim_ios - Reclaim AIOs completed, recycle IOCBs
1050 * @tip: Per-thread information
1051 * @naios_out: Number of AIOs we have outstanding (min)
1052 */
1053static void reclaim_ios(struct thr_info *tip, long naios_out)
1054{
1055 long i, ndone;
1056 struct io_event *evp, events[naios_out];
1057
1058again:
1059 assert(naios > 0);
1060 for (;;) {
1061 ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL);
1062 if (ndone > 0)
1063 break;
1064
1065 if (errno && errno != EINTR) {
1066 fatal("io_getevents", ERR_SYSCALL,
1067 "io_getevents failed\n");
1068 /*NOTREACHED*/
1069 }
1070 }
1071 assert(0 < ndone && ndone <= naios_out);
1072
1073 pthread_mutex_lock(&tip->mutex);
1074 for (i = 0, evp = events; i < ndone; i++, evp++) {
1075 struct iocb_pkt *iocbp = evp->data;
1076
1077 if (evp->res != iocbp->iocb.u.c.nbytes) {
1078 fatal(NULL, ERR_SYSCALL,
1079 "Event failure %ld/%ld\t(%ld + %ld)\n",
1080 (long)evp->res, (long)evp->res2,
1081 (long)iocbp->iocb.u.c.offset / nb_sec,
1082 (long)iocbp->iocb.u.c.nbytes / nb_sec);
1083 /*NOTREACHED*/
1084 }
1085
1086 list_move_tail(&iocbp->head, &tip->free_iocbs);
1087 }
1088
1089 tip->naios_free += ndone;
1090 tip->naios_out -= ndone;
1091 naios_out = minl(naios_out, tip->naios_out);
1092
1093 if (tip->send_wait) {
1094 tip->send_wait = 0;
1095 pthread_cond_signal(&tip->cond);
1096 }
1097 pthread_mutex_unlock(&tip->mutex);
1098
1099 /*
1100 * Short cut: If we /know/ there are some more AIOs, go handle them
1101 */
1102 if (naios_out)
1103 goto again;
1104}
1105
1106/**
1107 * replay_rec - Worker thread to reclaim AIOs
1108 * @arg: Pointer to thread information
1109 */
1110static void *replay_rec(void *arg)
1111{
1112 long naios_out;
1113 struct thr_info *tip = arg;
1114
1115 while ((naios_out = reap_wait_aios(tip)) > 0)
1116 reclaim_ios(tip, naios_out);
1117
1118 assert(tip->send_done);
1119 tip->reap_done = 1;
1120 set_reclaim_done();
1121
1122 return NULL;
1123}
1124
1125/*
1126 * ========================================================================
1127 * ==== REPLAY ROUTINES ===================================================
1128 * ========================================================================
1129 */
1130
1131/**
1132 * next_bunch - Retrieve next bunch of AIOs to process
1133 * @tip: Per-thread information
1134 * @bunch: Bunch information
1135 *
1136 * Returns TRUE if we recovered a bunch of IOs, else hit EOF
1137 */
1138static int next_bunch(struct thr_info *tip, struct io_bunch *bunch)
1139{
1140 size_t count, result;
1141
1142 result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr));
1143 if (result != sizeof(bunch->hdr)) {
1144 if (result == 0)
1145 return 0;
1146
1147 fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n",
1148 (long)result);
1149 /*NOTREACHED*/
1150 }
1151 assert(bunch->hdr.npkts <= BT_MAX_PKTS);
1152
1153 count = bunch->hdr.npkts * sizeof(struct io_pkt);
1154 result = read(tip->ifd, &bunch->pkts, count);
1155 if (result != count) {
1156 fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n",
1157 (long)result, (long)count);
1158 /*NOTREACHED*/
1159 }
1160
1161 return 1;
1162}
1163
1164/**
1165 * nfree_current - Returns current number of AIOs that are free
1166 *
1167 * Will wait for available ones...
1168 *
1169 * Returns 0 if we have some condition that causes us to exit
1170 */
1171static int nfree_current(struct thr_info *tip)
1172{
1173 int nfree = 0;
1174
1175 pthread_mutex_lock(&tip->mutex);
1176 while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) {
1177 tip->send_wait = 1;
1178 if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1179 fatal("pthread_cond_wait", ERR_SYSCALL,
1180 "nfree_current cond wait failed\n");
1181 /*NOTREACHED*/
1182 }
1183 }
1184 pthread_mutex_unlock(&tip->mutex);
1185
1186 return nfree;
1187}
1188
1189/**
1190 * stall - Stall for the number of nanoseconds requested
1191 *
1192 * We may be late, in which case we just return.
1193 */
1194static void stall(struct thr_info *tip, long long oclock)
1195{
1196 struct timespec req;
1197 long long dreal, tclock = gettime() - rgenesis;
1198
4a7968cc
LU
1199 oclock /= acc_factor;
1200
d47a3fec
AB
1201 if (verbose > 1)
1202 fprintf(tip->vfp, " stall(%lld.%09lld, %lld.%09lld)\n",
1203 du64_to_sec(oclock), du64_to_nsec(oclock),
1204 du64_to_sec(tclock), du64_to_nsec(tclock));
1205
1206 while (!is_send_done(tip) && tclock < oclock) {
1207 dreal = oclock - tclock;
1208 req.tv_sec = dreal / (1000 * 1000 * 1000);
1209 req.tv_nsec = dreal % (1000 * 1000 * 1000);
1210
1211 if (verbose > 1) {
1212 fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n",
1213 (long long)req.tv_sec,
1214 (long long)req.tv_nsec);
1215 }
1216
1217 if (nanosleep(&req, NULL) < 0 && signal_done)
1218 break;
1219
1220 tclock = gettime() - rgenesis;
1221 }
1222}
1223
1224/**
1225 * iocbs_map - Map a set of AIOs onto a set of IOCBs
1226 * @tip: Per-thread information
1227 * @list: List of AIOs created
1228 * @pkts: AIOs to map
1229 * @ntodo: Number of AIOs to map
1230 */
1231static void iocbs_map(struct thr_info *tip, struct iocb **list,
1232 struct io_pkt *pkts, int ntodo)
1233{
1234 int i;
1235 struct io_pkt *pkt;
1236
1237 assert(0 < ntodo && ntodo <= naios);
1238
1239 pthread_mutex_lock(&tip->mutex);
1240 assert(ntodo <= list_len(&tip->free_iocbs));
1241 for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) {
1242 __u32 rw = pkt->rw;
1243 struct iocb_pkt *iocbp;
1244
1245 if (!pkt->rw && !write_enabled)
1246 rw = 1;
1247
1248 if (verbose > 1)
1249 fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n",
1250 (unsigned long long)pkt->sector,
1251 (unsigned long long)pkt->nbytes / nb_sec,
1252 rw ? 'R' : 'W',
1253 (rw == 1 && pkt->rw == 0) ? '!' : ' ');
1254
1255 iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head);
1256 iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec);
1257
1258 list_move_tail(&iocbp->head, &tip->used_iocbs);
1259 list[i] = &iocbp->iocb;
1260 }
1261
1262 tip->naios_free -= ntodo;
1263 assert(tip->naios_free >= 0);
1264 pthread_mutex_unlock(&tip->mutex);
1265}
1266
1267/**
1268 * process_bunch - Process a bunch of requests
1269 * @tip: Per-thread information
1270 * @bunch: Bunch to process
1271 */
1272static void process_bunch(struct thr_info *tip, struct io_bunch *bunch)
1273{
1274 __u64 i = 0;
1275 struct iocb *list[bunch->hdr.npkts];
1276
1277 assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS);
1278 while (!is_send_done(tip) && (i < bunch->hdr.npkts)) {
1279 long ndone;
1280 int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i);
1281
1282 assert(0 < ntodo && ntodo <= naios);
1283 iocbs_map(tip, list, &bunch->pkts[i], ntodo);
1284 if (!no_stalls)
1285 stall(tip, bunch->hdr.time_stamp - genesis);
1286
1287 if (ntodo) {
1288 if (verbose > 1)
1289 fprintf(tip->vfp, "submit(%d)\n", ntodo);
1290 ndone = io_submit(tip->ctx, ntodo, list);
1291 if (ndone != (long)ntodo) {
1292 fatal("io_submit", ERR_SYSCALL,
1293 "%d: io_submit(%d:%ld) failed (%s)\n",
1294 tip->cpu, ntodo, ndone,
1295 strerror(labs(ndone)));
1296 /*NOTREACHED*/
1297 }
1298
1299 pthread_mutex_lock(&tip->mutex);
1300 tip->naios_out += ndone;
1301 assert(tip->naios_out <= naios);
1302 if (tip->reap_wait) {
1303 tip->reap_wait = 0;
1304 pthread_cond_signal(&tip->cond);
1305 }
1306 pthread_mutex_unlock(&tip->mutex);
1307
1308 i += ndone;
1309 assert(i <= bunch->hdr.npkts);
1310 }
1311 }
1312}
1313
1314/**
1315 * reset_input_file - Reset the input file for the next iteration
1316 * @tip: Thread information
1317 *
1318 * We also do a dummy read of the file header to get us to the first bunch.
1319 */
1320static void reset_input_file(struct thr_info *tip)
1321{
1322 struct io_file_hdr hdr;
1323
1324 lseek(tip->ifd, 0, 0);
1325
1326 if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
1327 fatal(tip->file_name, ERR_ARGS, "Header reread failed\n");
1328 /*NOTREACHED*/
1329 }
1330}
1331
1332/**
1333 * replay_sub - Worker thread to submit AIOs that are being replayed
1334 */
1335static void *replay_sub(void *arg)
1336{
a788dfde 1337 unsigned int i;
e41bf0ff 1338 char *mdev;
d47a3fec
AB
1339 char path[MAXPATHLEN];
1340 struct io_bunch bunch;
1341 struct thr_info *tip = arg;
358504bb 1342 int oflags;
d47a3fec
AB
1343
1344 pin_to_cpu(tip);
1345
e41bf0ff
ES
1346 mdev = map_dev(tip->devnm);
1347 sprintf(path, "/dev/%s", mdev);
1348 /*
1349 * convert underscores to slashes to
1350 * restore device names that have larger paths
1351 */
1352 for (i = 0; i < strlen(mdev); i++)
1353 if (path[strlen("/dev/") + i] == '_')
1354 path[strlen("/dev/") + i] = '/';
358504bb
JA
1355#ifdef O_NOATIME
1356 oflags = O_NOATIME;
1357#else
1358 oflags = 0;
1359#endif
1360 tip->ofd = open(path, O_RDWR | O_DIRECT | oflags);
d47a3fec
AB
1361 if (tip->ofd < 0) {
1362 fatal(path, ERR_SYSCALL, "Failed device open\n");
1363 /*NOTREACHED*/
1364 }
1365
1366 set_replay_ready();
1367 while (!is_send_done(tip) && tip->iterations--) {
1368 wait_iter_start();
cbb3e69e 1369 if (verbose > 1)
d47a3fec
AB
1370 fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations);
1371 while (!is_send_done(tip) && next_bunch(tip, &bunch))
1372 process_bunch(tip, &bunch);
1373 set_iter_done();
1374 reset_input_file(tip);
1375 }
1376 tip->send_done = 1;
1377 set_replay_done();
1378
1379 return NULL;
1380}
1381
1382/*
1383 * ========================================================================
1384 * ==== COMMAND LINE ARGUMENT HANDLING ====================================
1385 * ========================================================================
1386 */
1387
1388static char usage_str[] = \
4a7968cc 1389 "\n" \
d47a3fec
AB
1390 "\t[ -c <cpus> : --cpus=<cpus> ] Default: 1\n" \
1391 "\t[ -d <dir> : --input-directory=<dir> ] Default: .\n" \
4a7968cc 1392 "\t[ -F : --find-records ] Default: Off\n" \
d47a3fec
AB
1393 "\t[ -h : --help ] Default: Off\n" \
1394 "\t[ -i <base> : --input-base=<base> ] Default: replay\n" \
1395 "\t[ -I <iters>: --iterations=<iters> ] Default: 1\n" \
1396 "\t[ -M <file> : --map-devs=<file> ] Default: None\n" \
1397 "\t[ -N : --no-stalls ] Default: Off\n" \
4a7968cc 1398 "\t[ -x : --acc-factor ] Default: 1\n" \
d47a3fec
AB
1399 "\t[ -v : --verbose ] Default: Off\n" \
1400 "\t[ -V : --version ] Default: Off\n" \
1401 "\t[ -W : --write-enable ] Default: Off\n" \
1402 "\t<dev...> Default: None\n" \
1403 "\n";
1404
4a7968cc 1405#define S_OPTS "c:d:Fhi:I:M:Nx:t:vVW"
d47a3fec
AB
1406static struct option l_opts[] = {
1407 {
1408 .name = "cpus",
1409 .has_arg = required_argument,
1410 .flag = NULL,
1411 .val = 'c'
1412 },
1413 {
1414 .name = "input-directory",
1415 .has_arg = required_argument,
1416 .flag = NULL,
1417 .val = 'd'
1418 },
1419 {
1420 .name = "find-records",
1421 .has_arg = no_argument,
1422 .flag = NULL,
1423 .val = 'F'
1424 },
1425 {
1426 .name = "help",
1427 .has_arg = no_argument,
1428 .flag = NULL,
1429 .val = 'h'
1430 },
1431 {
1432 .name = "input-base",
1433 .has_arg = required_argument,
1434 .flag = NULL,
1435 .val = 'i'
1436 },
1437 {
1438 .name = "iterations",
1439 .has_arg = required_argument,
1440 .flag = NULL,
1441 .val = 'I'
1442 },
1443 {
1444 .name = "map-devs",
1445 .has_arg = required_argument,
1446 .flag = NULL,
1447 .val = 'M'
1448 },
1449 {
1450 .name = "no-stalls",
1451 .has_arg = no_argument,
1452 .flag = NULL,
1453 .val = 'N'
1454 },
4a7968cc
LU
1455 {
1456 .name = "acc-factor",
1457 .has_arg = required_argument,
1458 .flag = NULL,
1459 .val = 'x'
1460 },
d47a3fec
AB
1461 {
1462 .name = "verbose",
1463 .has_arg = no_argument,
1464 .flag = NULL,
1465 .val = 'v'
1466 },
1467 {
1468 .name = "version",
1469 .has_arg = no_argument,
1470 .flag = NULL,
1471 .val = 'V'
1472 },
1473 {
1474 .name = "write-enable",
1475 .has_arg = no_argument,
1476 .flag = NULL,
1477 .val = 'W'
1478 },
1479 {
1480 .name = NULL
1481 }
1482};
1483
1484/**
1485 * handle_args: Parse passed in argument list
1486 * @argc: Number of arguments in argv
1487 * @argv: Arguments passed in
1488 *
1489 * Does rudimentary parameter verification as well.
1490 */
1491static void handle_args(int argc, char *argv[])
1492{
1493 int c;
4a7968cc 1494 int r;
d47a3fec
AB
1495
1496 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1497 switch (c) {
1498 case 'c':
1499 cpus_to_use = atoi(optarg);
1500 if (cpus_to_use <= 0 || cpus_to_use > ncpus) {
1501 fatal(NULL, ERR_ARGS,
1502 "Invalid number of cpus %d (0<x<%d)\n",
1503 cpus_to_use, ncpus);
1504 /*NOTREACHED*/
1505 }
1506 break;
1507
1508 case 'd':
1509 idir = optarg;
1510 if (access(idir, R_OK | X_OK) != 0) {
1511 fatal(idir, ERR_ARGS,
1512 "Invalid input directory specified\n");
1513 /*NOTREACHED*/
1514 }
1515 break;
1516
1517 case 'F':
1518 find_records = 1;
1519 break;
1520
1521 case 'h':
1522 usage();
1523 exit(0);
1524 /*NOTREACHED*/
1525
1526 case 'i':
1527 ibase = optarg;
1528 break;
1529
1530 case 'I':
1531 def_iterations = atoi(optarg);
1532 if (def_iterations <= 0) {
1533 fprintf(stderr,
1534 "Invalid number of iterations %d\n",
1535 def_iterations);
1536 exit(ERR_ARGS);
1537 /*NOTREACHED*/
1538 }
1539 break;
1540
1541 case 'M':
1542 read_map_devs(optarg);
1543 break;
1544
1545 case 'N':
1546 no_stalls = 1;
1547 break;
1548
4a7968cc
LU
1549 case 'x':
1550 r = sscanf(optarg,"%u",&acc_factor);
1551 if (r!=1) {
1552 fprintf(stderr,
1553 "Invalid acceleration factor\n");
1554 exit(ERR_ARGS);
1555 /*NOTREACHED*/
1556 }
1557 break;
1558
d47a3fec
AB
1559 case 'V':
1560 fprintf(stderr, "btreplay -- version %s\n",
1561 my_btversion);
1562 fprintf(stderr, " Built on %s\n",
1563 build_date);
1564 exit(0);
1565 /*NOTREACHED*/
1566
1567 case 'v':
1568 verbose++;
1569 break;
1570
1571 case 'W':
1572 write_enabled = 1;
1573 break;
1574
1575 default:
1576 usage();
1577 fatal(NULL, ERR_ARGS,
1578 "Invalid command line argument %c\n", c);
1579 /*NOTREACHED*/
1580 }
1581 }
1582
1583 while (optind < argc)
1584 add_input_dev(argv[optind++]);
1585
1586 if (find_records)
1587 find_input_devs(idir);
1588
1589 if (list_len(&input_devs) == 0) {
1590 fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n");
1591 /*NOTREACHED*/
1592 }
1593
1594 if (cpus_to_use < 0)
1595 cpus_to_use = ncpus;
1596}
1597
1598/*
1599 * ========================================================================
1600 * ==== MAIN ROUTINE ======================================================
1601 * ========================================================================
1602 */
1603
1604/**
1605 * set_signal_done - Signal handler, catches signals & sets signal_done
1606 */
1607static void set_signal_done(__attribute__((__unused__))int signum)
1608{
1609 signal_done = 1;
1610}
1611
1612/**
1613 * main -
1614 * @argc: Number of arguments
1615 * @argv: Array of arguments
1616 */
1617int main(int argc, char *argv[])
1618{
1619 int i;
1620 struct list_head *p;
1621
1622 pgsize = getpagesize();
1623 assert(pgsize > 0);
1624
1625 setup_signal(SIGINT, set_signal_done);
1626 setup_signal(SIGTERM, set_signal_done);
1627
1628 get_ncpus();
1629 handle_args(argc, argv);
1630 find_input_files();
1631
1632 nfiles = list_len(&input_files);
1633 __list_for_each(p, &input_files) {
1634 tip_init(list_entry(p, struct thr_info, head));
1635 }
1636
1637 wait_replays_ready();
1638 for (i = 0; i < def_iterations; i++) {
1639 rgenesis = gettime();
1640 start_iter();
1641 if (verbose)
1642 fprintf(stderr, "I");
1643 wait_iters_done();
1644 }
1645
1646 wait_replays_done();
1647 wait_reclaims_done();
1648
1649 if (verbose)
1650 fprintf(stderr, "\n");
1651
1652 rem_input_files();
1653 release_map_devs();
1654
1655 return 0;
1656}