Add btrecord/btreplay capability
[blktrace.git] / btreplay / btreplay.c
1 /*
2  * Blktrace replay utility - Play traces back
3  *
4  * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20
21 static char build_date[] = __DATE__ " at "__TIME__;
22
23 #include <assert.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <libaio.h>
27 #include <pthread.h>
28 #include <sched.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/time.h>
38 #include <sys/types.h>
39 #include <dirent.h>
40
41 #if !defined(_GNU_SOURCE)
42 #       define _GNU_SOURCE
43 #endif
44 #include <getopt.h>
45
46 #include "list.h"
47 #include "btrecord.h"
48
49 /* 
50  * ========================================================================
51  * ==== STRUCTURE DEFINITIONS =============================================
52  * ========================================================================
53  */
54
55 /**
56  * Each device map has one of these:
57  * 
58  * @head:       Linked on to map_devs
59  * @from_dev:   Device name as seen on recorded system
60  * @to_dev:     Device name to be used on replay system
61  */
62 struct map_dev {
63         struct list_head head;
64         char *from_dev, *to_dev;
65 };
66
67 /**
68  * Each device name specified has one of these (until threads are created)
69  *
70  * @head:       Linked onto input_devs
71  * @devnm:      Device name -- 'sd*'
72  */
73 struct dev_info {
74         struct list_head head;
75         char *devnm;
76 };
77
78 /*
79  * Per input file information
80  *
81  * @head:       Used to link up on input_files
82  * @free_iocbs: List of free iocb's available for use
83  * @used_iocbs: List of iocb's currently outstanding
84  * @mutex:      Mutex used with condition variable to protect volatile values
85  * @cond:       Condition variable used when waiting on a volatile value change
86  * @naios_out:  Current number of AIOs outstanding on this context
87  * @naios_free: Number of AIOs on the free list (short cut for list_len)
88  * @send_wait:  Boolean: When true, the sub thread is waiting on free IOCBs
89  * @reap_wait:  Boolean: When true, the rec thread is waiting on used IOCBs
90  * @send_done:  Boolean: When true, the sub thread has completed work
91  * @reap_done:  Boolean: When true, the rec thread has completed work
92  * @sub_thread: Thread used to submit IOs.
93  * @rec_thread: Thread used to reclaim IOs.
94  * @ctx:        IO context
95  * @devnm:      Copy of the device name being managed by this thread
96  * @file_name:  Full name of the input file
97  * @cpu:        CPU this thread is pinned to
98  * @ifd:        Input file descriptor
99  * @ofd:        Output file descriptor
100  * @iterations: Remaining iterations to process
101  * @vfp:        For verbose dumping of actions performed
102  */
103 struct thr_info {
104         struct list_head head, free_iocbs, used_iocbs;
105         pthread_mutex_t mutex;
106         pthread_cond_t cond;
107         volatile long naios_out, naios_free;
108         volatile int send_wait, reap_wait, send_done, reap_done;
109         pthread_t sub_thread, rec_thread;
110         io_context_t ctx;
111         char *devnm, *file_name;
112         int cpu, ifd, ofd, iterations;
113         FILE *vfp;
114 };
115
116 /*
117  * Every Asynchronous IO used has one of these (naios per file/device).
118  *
119  * @iocb:       IOCB sent down via io_submit
120  * @head:       Linked onto file_list.free_iocbs or file_list.used_iocbs
121  * @tip:        Pointer to per-thread information this IO is associated with
122  * @nbytes:     Number of bytes in buffer associated with iocb
123  */
124 struct iocb_pkt {
125         struct iocb iocb;
126         struct list_head head;
127         struct thr_info *tip;
128         int nbytes;
129 };
130
131 /* 
132  * ========================================================================
133  * ==== GLOBAL VARIABLES ==================================================
134  * ========================================================================
135  */
136
137 static volatile int signal_done = 0;    // Boolean: Signal'ed, need to quit
138
139 static char *ibase = "replay";          // Input base name
140 static char *idir = ".";                // Input directory base
141 static int cpus_to_use = -1;            // Number of CPUs to use
142 static int def_iterations = 1;          // Default number of iterations
143 static int naios = 512;                 // Number of AIOs per thread
144 static int ncpus = 0;                   // Number of CPUs in the system
145 static int verbose = 0;                 // Boolean: Output some extra info
146 static int write_enabled = 0;           // Boolean: Enable writing
147 static __u64 genesis = ~0;              // Earliest time seen
148 static __u64 rgenesis;                  // Our start time
149 static size_t pgsize;                   // System Page size
150 static int nb_sec = 512;                // Number of bytes per sector
151 static LIST_HEAD(input_devs);           // List of devices to handle
152 static LIST_HEAD(input_files);          // List of input files to handle
153 static LIST_HEAD(map_devs);             // List of device maps
154 static int nfiles = 0;                  // Number of files to handle
155 static int no_stalls = 0;               // Boolean: Disable pre-stalls
156 static int find_records = 0;            // Boolean: Find record files auto
157
158 /*
159  * Variables managed under control of condition variables.
160  *
161  * n_reclaims_done:     Counts number of reclaim threads that have completed.
162  * n_replays_done:      Counts number of replay threads that have completed.
163  * n_replays_ready:     Counts number of replay threads ready to start.
164  * n_iters_done:        Counts number of replay threads done one iteration.
165  * iter_start:          Starts an iteration for the replay threads.
166  */
167 static volatile int n_reclaims_done = 0;
168 static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER;
169 static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER;
170
171 static volatile int n_replays_done = 0;
172 static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER;
173 static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER;
174
175 static volatile int n_replays_ready = 0;
176 static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER;
177 static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER;
178
179 static volatile int n_iters_done = 0;
180 static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER;
181 static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER;
182
183 static volatile int iter_start = 0;
184 static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER;
185 static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER;
186
187 /* 
188  * ========================================================================
189  * ==== FORWARD REFERENECES ===============================================
190  * ========================================================================
191  */
192
193 static void *replay_sub(void *arg);
194 static void *replay_rec(void *arg);
195 static char usage_str[];
196
197 /* 
198  * ========================================================================
199  * ==== INLINE ROUTINES ===================================================
200  * ========================================================================
201  */
202
203 /*
204  * The 'fatal' macro will output a perror message (if errstring is !NULL)
205  * and display a string (with variable arguments) and then exit with the 
206  * specified exit value.
207  */
208 #define ERR_ARGS                        1
209 #define ERR_SYSCALL                     2
210 #define fatal(errstring, exitval, arg...)                               \
211         do {                                                            \
212                 if (errstring) perror(errstring);                       \
213                 fprintf(stderr, ##arg);                                 \
214                 exit(exitval);                                          \
215                 /*NOTREACHED*/                                          \
216         } while (0)
217
218 static inline long long unsigned du64_to_sec(__u64 du64)
219 {
220         return (long long unsigned)du64 / (1000 * 1000 * 1000);
221 }
222
223 static inline long long unsigned du64_to_nsec(__u64 du64)
224 {
225         return llabs((long long)du64) % (1000 * 1000 * 1000);
226 }
227
228 /**
229  * min - Return minimum of two integers
230  */
231 static inline int min(int a, int b)
232
233         return a < b ? a : b;
234 }
235
236 /**
237  * minl - Return minimum of two longs
238  */
239 static inline long minl(long a, long b)
240
241         return a < b ? a : b;
242 }
243
244 /**
245  * usage - Display usage string and version
246  */
247 static inline void usage(void)
248 {
249         fprintf(stderr, "Usage: btreplay -- version %s\n%s", 
250                 my_btversion, usage_str);
251 }
252
253 /**
254  * is_send_done - Returns true if sender should quit early
255  * @tip: Per-thread information
256  */
257 static inline int is_send_done(struct thr_info *tip)
258 {
259         return signal_done || tip->send_done;
260 }
261
262 /**
263  * is_reap_done - Returns true if reaper should quit early
264  * @tip: Per-thread information
265  */
266 static inline int is_reap_done(struct thr_info *tip)
267 {
268         return tip->send_done && tip->naios_out == 0;
269 }
270
271 /**
272  * ts2ns - Convert timespec values to a nanosecond value
273  */
274 #define NS_TICKS                ((__u64)1000 * (__u64)1000 * (__u64)1000)
275 static inline __u64 ts2ns(struct timespec *ts)
276 {
277         return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec);
278 }
279
280 /**
281  * ts2ns - Convert timeval values to a nanosecond value
282  */
283 static inline __u64 tv2ns(struct timeval *tp)
284 {
285         return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000);
286 }
287
288 /**
289  * touch_memory - Force physical memory to be allocating it
290  * 
291  * For malloc()ed memory we need to /touch/ it to make it really
292  * exist. Otherwise, for write's (to storage) things may not work
293  * as planned - we see Linux just use a single area to /read/ from
294  * (as there isn't any memory that has been associated with the 
295  * allocated virtual addresses yet).
296  */
297 static inline void touch_memory(char *buf, size_t bsize)
298 {
299 #if defined(PREP_BUFS)
300         memset(buf, 0, bsize);
301 #else
302         size_t i;
303
304         for (i = 0; i < bsize; i += pgsize)
305                 buf[i] = 0;
306 #endif
307 }
308
309 /**
310  * buf_alloc - Returns a page-aligned buffer of the specified size
311  * @nbytes: Number of bytes to allocate
312  */
313 static inline void *buf_alloc(size_t nbytes)
314 {
315         void *buf;
316
317         if (posix_memalign(&buf, pgsize, nbytes)) {
318                 fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n");
319                 /*NOTREACHED*/
320         }
321
322         return buf;
323 }
324
325 /**
326  * gettime - Returns current time 
327  */
328 static inline __u64 gettime(void)
329 {
330         static int use_clock_gettime = -1;              // Which clock to use
331
332         if (use_clock_gettime < 0) {
333                 use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0;
334                 if (use_clock_gettime) {
335                         struct timespec ts = {
336                                 .tv_sec = 0,
337                                 .tv_nsec = 0
338                         };
339                         clock_settime(CLOCK_MONOTONIC, &ts);
340                 }
341         }
342
343         if (use_clock_gettime) {
344                 struct timespec ts;
345                 clock_gettime(CLOCK_MONOTONIC, &ts);
346                 return ts2ns(&ts);
347         }
348         else {
349                 struct timeval tp;
350                 gettimeofday(&tp, NULL);
351                 return tv2ns(&tp);
352         }
353 }
354
355 /**
356  * setup_signal - Set up a signal handler for the specified signum
357  */
358 static inline void setup_signal(int signum, sighandler_t handler)
359 {
360         if (signal(signum, handler) == SIG_ERR) {
361                 fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n",
362                         signum);
363                 /*NOTREACHED*/
364         }
365 }
366
367 /* 
368  * ========================================================================
369  * ==== CONDITION VARIABLE ROUTINES =======================================
370  * ========================================================================
371  */
372
373 /**
374  * __set_cv - Increments a variable under condition variable control.
375  * @pmp:        Pointer to the associated mutex
376  * @pcp:        Pointer to the associated condition variable
377  * @vp:         Pointer to the variable being incremented
378  * @mxv:        Max value for variable (Used only when ASSERTS are on)
379  */
380 static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
381                             volatile int *vp, 
382                             __attribute__((__unused__))int mxv)
383 {
384         pthread_mutex_lock(pmp);
385         assert(*vp < mxv);
386         *vp += 1;
387         pthread_cond_signal(pcp);
388         pthread_mutex_unlock(pmp);
389 }
390
391 /**
392  * __wait_cv - Waits for a variable under cond var control to hit a value
393  * @pmp:        Pointer to the associated mutex
394  * @pcp:        Pointer to the associated condition variable
395  * @vp:         Pointer to the variable being incremented
396  * @mxv:        Value to wait for
397  */
398 static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp,
399                              volatile int *vp, int mxv)
400 {
401         pthread_mutex_lock(pmp);
402         while (*vp < mxv)
403                 pthread_cond_wait(pcp, pmp);
404         *vp = 0;
405         pthread_mutex_unlock(pmp);
406 }
407
408 static inline void set_reclaim_done(void)
409 {
410         __set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
411                  nfiles);
412 }
413
414 static inline void wait_reclaims_done(void)
415 {
416         __wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done,
417                   nfiles);
418 }
419
420 static inline void set_replay_ready(void)
421 {
422         __set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
423                  nfiles);
424 }
425
426 static inline void wait_replays_ready(void)
427 {
428         __wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready,
429                   nfiles);
430 }
431
432 static inline void set_replay_done(void)
433 {
434         __set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
435                 nfiles);
436 }
437
438 static inline void wait_replays_done(void)
439 {
440         __wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done,
441                   nfiles);
442 }
443
444 static inline void set_iter_done(void)
445 {
446         __set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
447                 nfiles);
448 }
449
450 static inline void wait_iters_done(void)
451 {
452         __wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done,
453                   nfiles);
454 }
455
456 /**
457  * wait_iter_start - Wait for an iteration to start 
458  * 
459  * This is /slightly/ different: we are waiting for a value to become
460  * non-zero, and then we decrement it and go on. 
461  */
462 static inline void wait_iter_start(void)
463 {
464         pthread_mutex_lock(&iter_start_mutex);
465         while (iter_start == 0)
466                 pthread_cond_wait(&iter_start_cond, &iter_start_mutex);
467         assert(1 <= iter_start && iter_start <= nfiles);
468         iter_start--;
469         pthread_mutex_unlock(&iter_start_mutex);
470 }
471
472 /**
473  * start_iter - Start an iteration at the replay thread level
474  */
475 static inline void start_iter(void)
476 {
477         pthread_mutex_lock(&iter_start_mutex);
478         assert(iter_start == 0);
479         iter_start = nfiles;
480         pthread_cond_broadcast(&iter_start_cond);
481         pthread_mutex_unlock(&iter_start_mutex);
482 }
483
484 /* 
485  * ========================================================================
486  * ==== CPU RELATED ROUTINES ==============================================
487  * ========================================================================
488  */
489
490 /**
491  * get_ncpus - Sets up the global 'ncpus' value
492  */
493 static void get_ncpus(void)
494 {
495         cpu_set_t cpus;
496
497         if (sched_getaffinity(getpid(), sizeof(cpus), &cpus)) {
498                 fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n");
499                 /*NOTREACHED*/
500         }
501
502         /*
503          * XXX This assumes (perhaps wrongly) that there are no /holes/ 
504          * XXX in the mask.
505          */
506         for (ncpus = 0; ncpus < CPU_SETSIZE && CPU_ISSET(ncpus, &cpus); ncpus++)
507                 ;
508         if (ncpus == 0) {
509                 fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n");
510                 /*NOTREACHED*/
511         }
512 }
513
514 /**
515  * pin_to_cpu - Pin this thread to a specific CPU
516  * @tip: Thread information
517  */
518 static void pin_to_cpu(struct thr_info *tip)
519 {
520         cpu_set_t cpus;
521
522         assert(0 <= tip->cpu && tip->cpu < ncpus);
523
524         CPU_ZERO(&cpus);
525         CPU_SET(tip->cpu, &cpus);
526         if (sched_setaffinity(getpid(), sizeof(cpus), &cpus)) {
527                 fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n");
528                 /*NOTREACHED*/
529         }
530
531         if (verbose > 1) {
532                 int i;
533                 cpu_set_t now;
534
535                 (void)sched_getaffinity(getpid(), sizeof(now), &now);
536                 fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu);
537                 for (i = 0; i < ncpus; i++)
538                         fprintf(tip->vfp, "%1d", CPU_ISSET(i, &now));
539                 fprintf(tip->vfp, "\n");
540         }
541 }
542
543 /* 
544  * ========================================================================
545  * ==== INPUT DEVICE HANDLERS =============================================
546  * ========================================================================
547  */
548
549 /**
550  * add_input_dev - Add a device ('sd*') to the list of devices to handle
551  */
552 static void add_input_dev(char *devnm)
553 {
554         struct list_head *p;
555         struct dev_info *dip;
556
557         __list_for_each(p, &input_devs) {
558                 dip = list_entry(p, struct dev_info, head);
559                 if (strcmp(dip->devnm, devnm) == 0)
560                         return;
561         }
562
563         dip = malloc(sizeof(*dip));
564         dip->devnm = strdup(devnm);
565         list_add_tail(&dip->head, &input_devs);
566 }
567
568 /**
569  * rem_input_dev - Remove resources associated with this device
570  */
571 static void rem_input_dev(struct dev_info *dip)
572 {
573         list_del(&dip->head);
574         free(dip->devnm);
575         free(dip);
576 }
577
578 static void find_input_devs(char *idir)
579 {
580         struct dirent *ent;
581         DIR *dir = opendir(idir);
582
583         if (dir == NULL) {
584                 fatal(idir, ERR_ARGS, "Unable to open %s\n", idir);
585                 /*NOTREACHED*/
586         }
587
588         while ((ent = readdir(dir)) != NULL) {
589                 char *p, *dsf = malloc(256);
590
591                 if (strstr(ent->d_name, ".replay.") == NULL)
592                         continue;
593
594                 dsf = strdup(ent->d_name);
595                 p = index(dsf, '.');
596                 assert(p != NULL);
597                 *p = '\0';
598                 add_input_dev(dsf);
599                 free(dsf);
600         }
601
602         closedir(dir);
603 }
604
605 /* 
606  * ========================================================================
607  * ==== MAP DEVICE INTERFACES =============================================
608  * ========================================================================
609  */
610
611 /**
612  * read_map_devs - Read in a set of device mapping from the provided file.
613  * @file_name:  File containing device maps
614  *
615  * We support the notion of multiple such files being specifed on the cmd line
616  */
617 static void read_map_devs(char *file_name)
618 {
619         FILE *fp;
620         char *from_dev, *to_dev;
621
622         fp = fopen(file_name, "r");
623         if (!fp) {
624                 fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n");
625                 /*NOTREACHED*/
626         }
627
628         while (fscanf(fp, "%as %as", &from_dev, &to_dev) == 2) {
629                 struct map_dev *mdp = malloc(sizeof(*mdp));
630
631                 mdp->from_dev = from_dev;
632                 mdp->to_dev = to_dev;
633                 list_add_tail(&mdp->head, &map_devs);
634         }
635
636         fclose(fp);
637 }
638
639 /**
640  * release_map_devs - Release resources associated with device mappings.
641  */
642 static void release_map_devs(void)
643 {
644         struct list_head *p, *q;
645
646         list_for_each_safe(p, q, &map_devs) {
647                 struct map_dev *mdp = list_entry(p, struct map_dev, head);
648
649                 list_del(&mdp->head);
650
651                 free(mdp->from_dev);
652                 free(mdp->to_dev);
653                 free(mdp);
654         }
655 }
656
657 /**
658  * map_dev - Return the mapped device for that specified
659  * @from_dev:   Device name as seen on recorded system
660  *
661  * Note: If there is no such mapping, we return the same name.
662  */
663 static char *map_dev(char *from_dev)
664 {
665         struct list_head *p;
666
667         __list_for_each(p, &map_devs) {
668                 struct map_dev *mdp = list_entry(p, struct map_dev, head);
669
670                 if (strcmp(from_dev, mdp->from_dev) == 0)
671                         return mdp->to_dev;
672         }
673
674         return from_dev;
675 }
676
677 /* 
678  * ========================================================================
679  * ==== IOCB MANAGEMENT ROUTINES ==========================================
680  * ========================================================================
681  */
682
683 /**
684  * iocb_init - Initialize the fields of an IOCB
685  * @tip: Per-thread information
686  * iocbp: IOCB pointer to update
687  */
688 static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp)
689 {
690         iocbp->tip = tip;
691         iocbp->nbytes = 0;
692         iocbp->iocb.u.c.buf = NULL;
693 }
694
695 /**
696  * iocb_setup - Set up an iocb with this AIOs information
697  * @iocbp: IOCB pointer to update
698  * @rw: Direction (0 == write, 1 == read)
699  * @n: Number of bytes to transfer
700  * @off: Offset (in bytes)
701  */
702 static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off)
703 {
704         char *buf;
705         struct iocb *iop = &iocbp->iocb;
706
707         assert(rw == 0 || rw == 1);
708         assert(0 < n && (n % nb_sec) == 0);
709         assert(0 <= off);
710
711         if (iocbp->nbytes) {
712                 if (iocbp->nbytes >= n) {
713                         buf = iop->u.c.buf;
714                         goto prep;
715                 }
716
717                 assert(iop->u.c.buf);
718                 free(iop->u.c.buf);
719         }
720
721         buf = buf_alloc(n);
722         iocbp->nbytes = n;
723
724 prep:
725         if (rw)
726                 io_prep_pread(iop, iocbp->tip->ofd, buf, n, off);
727         else {
728                 assert(write_enabled);
729                 io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off);
730                 touch_memory(buf, n);
731         }
732
733         iop->data = iocbp;
734 }
735
736 /* 
737  * ========================================================================
738  * ==== PER-THREAD SET UP & TEAR DOWN =====================================
739  * ========================================================================
740  */
741
742 /**
743  * tip_init - Per thread initialization function
744  */
745 static void tip_init(struct thr_info *tip)
746 {
747         int i;
748
749         INIT_LIST_HEAD(&tip->free_iocbs);
750         INIT_LIST_HEAD(&tip->used_iocbs);
751
752         pthread_mutex_init(&tip->mutex, NULL);
753         pthread_cond_init(&tip->cond, NULL);
754
755         if (io_setup(naios, &tip->ctx)) {
756                 fatal("io_setup", ERR_SYSCALL, "io_setup failed\n");
757                 /*NOTREACHED*/
758         }
759
760         tip->ofd = -1;
761         tip->naios_out = 0;
762         tip->send_done = tip->reap_done = 0;
763         tip->send_wait = tip->reap_wait = 0;
764
765         memset(&tip->sub_thread, 0, sizeof(tip->sub_thread));
766         memset(&tip->rec_thread, 0, sizeof(tip->rec_thread));
767
768         for (i = 0; i < naios; i++) {
769                 struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp));
770
771                 iocb_init(tip, iocbp);
772                 list_add_tail(&iocbp->head, &tip->free_iocbs);
773         }
774         tip->naios_free = naios;
775
776         if (verbose > 1) {
777                 char fn[MAXPATHLEN];
778
779                 sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase, 
780                         tip->cpu);
781                 tip->vfp = fopen(fn, "w");
782                 if (!tip->vfp) {
783                         fatal(fn, ERR_SYSCALL, "Failed to open report\n");
784                         /*NOTREACHED*/
785                 }
786
787                 setlinebuf(tip->vfp);
788         }
789
790         if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) {
791                 fatal("pthread_create", ERR_SYSCALL, 
792                         "thread create failed\n");
793                 /*NOTREACHED*/
794         }
795
796         if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) {
797                 fatal("pthread_create", ERR_SYSCALL, 
798                         "thread create failed\n");
799                 /*NOTREACHED*/
800         }
801 }
802
803 /**
804  * tip_release - Release resources associated with this thread
805  */
806 static void tip_release(struct thr_info *tip)
807 {
808         struct list_head *p, *q;
809
810         assert(tip->send_done);
811         assert(tip->reap_done);
812         assert(list_len(&tip->used_iocbs) == 0);
813         assert(tip->naios_free == naios);
814
815         if (pthread_join(tip->sub_thread, NULL)) {
816                 fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n");
817                 /*NOTREACHED*/
818         }
819         if (pthread_join(tip->rec_thread, NULL)) {
820                 fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n");
821                 /*NOTREACHED*/
822         }
823
824         io_destroy(tip->ctx);
825
826         list_splice(&tip->used_iocbs, &tip->free_iocbs);
827         list_for_each_safe(p, q, &tip->free_iocbs) {
828                 struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head);
829
830                 list_del(&iocbp->head);
831                 if (iocbp->nbytes) 
832                         free(iocbp->iocb.u.c.buf);
833                 free(iocbp);
834         }
835
836         pthread_cond_destroy(&tip->cond);
837         pthread_mutex_destroy(&tip->mutex);
838 }
839
840 /**
841  * add_input_file - Allocate and initialize per-input file structure
842  * @cpu: CPU for this file
843  * @devnm: Device name for this file
844  * @file_name: Fully qualifed input file name
845  */
846 static void add_input_file(int cpu, char *devnm, char *file_name)
847 {
848         struct stat buf;
849         struct io_file_hdr hdr;
850         struct thr_info *tip = buf_alloc(sizeof(*tip));
851         __u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub);
852
853         assert(0 <= cpu && cpu < ncpus);
854
855         memset(&hdr, 0, sizeof(hdr));
856         memset(tip, 0, sizeof(*tip));
857         tip->cpu = cpu % cpus_to_use;
858         tip->iterations = def_iterations;
859
860         tip->ifd = open(file_name, O_RDONLY);
861         if (tip->ifd < 0) {
862                 fatal(file_name, ERR_ARGS, "Unable to open\n");
863                 /*NOTREACHED*/
864         }
865         if (fstat(tip->ifd, &buf) < 0) {
866                 fatal(file_name, ERR_SYSCALL, "fstat failed\n");
867                 /*NOTREACHED*/
868         }
869         if (buf.st_size < (off_t)sizeof(hdr)) {
870                 if (verbose)
871                         fprintf(stderr, "\t%s empty\n", file_name);
872                 goto empty_file;
873         }
874
875         if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
876                 fatal(file_name, ERR_ARGS, "Header read failed\n");
877                 /*NOTREACHED*/
878         }
879
880         if (hdr.version != my_version) {
881                 fprintf(stderr, "%llx %llx %llx %llx\n", 
882                         (long long unsigned)hdr.version,
883                         (long long unsigned)hdr.genesis,
884                         (long long unsigned)hdr.nbunches,
885                         (long long unsigned)hdr.total_pkts);
886                 fatal(NULL, ERR_ARGS, 
887                         "BT version mismatch: %lx versus my %lx\n",
888                         (long)hdr.version, (long)my_version);
889                         
890         }
891
892         if (hdr.nbunches == 0) {
893 empty_file:
894                 close(tip->ifd);
895                 free(tip);
896                 return;
897         }
898
899         if (hdr.genesis < genesis) {
900                 if (verbose > 1)
901                         fprintf(stderr, "Setting genesis to %llu.%llu\n",
902                                 du64_to_sec(hdr.genesis),
903                                 du64_to_nsec(hdr.genesis));
904                 genesis = hdr.genesis;
905         }
906
907         tip->devnm = strdup(devnm);
908         tip->file_name = strdup(file_name);
909
910         list_add_tail(&tip->head, &input_files);
911
912         if (verbose)
913                 fprintf(stderr, "Added %s %llu\n", file_name, 
914                         (long long)hdr.genesis);
915 }
916
917 /**
918  * rem_input_file - Release resources associated with an input file
919  * @tip: Per-input file information
920  */
921 static void rem_input_file(struct thr_info *tip)
922 {
923         list_del(&tip->head);
924
925         tip_release(tip);
926
927         close(tip->ofd);
928         close(tip->ifd);
929         free(tip->file_name);
930         free(tip->devnm);
931         free(tip);
932 }
933
934 /**
935  * rem_input_files - Remove all input files
936  */
937 static void rem_input_files(void)
938 {
939         struct list_head *p, *q;
940
941         list_for_each_safe(p, q, &input_files) {
942                 rem_input_file(list_entry(p, struct thr_info, head));
943         }
944 }
945
946 /**
947  * __find_input_files - Find input files associated with this device (per cpu)
948  */
949 static void __find_input_files(struct dev_info *dip)
950 {
951         int cpu = 0;
952
953         for (;;) {
954                 char full_name[MAXPATHLEN];
955
956                 sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu);
957                 if (access(full_name, R_OK) != 0)
958                         break;
959
960                 add_input_file(cpu, dip->devnm, full_name);
961                 cpu++;
962         }
963
964         if (!cpu) {
965                 fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm);
966                 /*NOTREACHED*/
967         }
968
969         rem_input_dev(dip);
970 }
971
972
973 /**
974  * find_input_files - Find input files for all devices
975  */
976 static void find_input_files(void)
977 {
978         struct list_head *p, *q;
979
980         list_for_each_safe(p, q, &input_devs) {
981                 __find_input_files(list_entry(p, struct dev_info, head));
982         }
983 }
984
985 /* 
986  * ========================================================================
987  * ==== RECLAIM ROUTINES ==================================================
988  * ========================================================================
989  */
990
991 /**
992  * reap_wait_aios - Wait for and return number of outstanding AIOs
993  *
994  * Will return 0 if we are done
995  */
996 static int reap_wait_aios(struct thr_info *tip)
997 {
998         int naios = 0;
999
1000         if (!is_reap_done(tip)) {
1001                 pthread_mutex_lock(&tip->mutex);
1002                 while (tip->naios_out == 0) {
1003                         tip->reap_wait = 1;
1004                         if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1005                                 fatal("pthread_cond_wait", ERR_SYSCALL, 
1006                                         "nfree_current cond wait failed\n");
1007                                 /*NOTREACHED*/
1008                         }
1009                 }
1010                 naios = tip->naios_out;
1011                 pthread_mutex_unlock(&tip->mutex);
1012         }
1013         assert(is_reap_done(tip) || naios > 0);
1014
1015         return is_reap_done(tip) ? 0 : naios;
1016 }
1017
1018 /**
1019  * reclaim_ios - Reclaim AIOs completed, recycle IOCBs
1020  * @tip: Per-thread information
1021  * @naios_out: Number of AIOs we have outstanding (min)
1022  */
1023 static void reclaim_ios(struct thr_info *tip, long naios_out)
1024 {
1025         long i, ndone;
1026         struct io_event *evp, events[naios_out];
1027
1028 again:
1029         assert(naios > 0);
1030         for (;;) {
1031                 ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL);
1032                 if (ndone > 0)
1033                         break;
1034
1035                 if (errno && errno != EINTR) {
1036                         fatal("io_getevents", ERR_SYSCALL, 
1037                                 "io_getevents failed\n");
1038                         /*NOTREACHED*/
1039                 }
1040         }
1041         assert(0 < ndone && ndone <= naios_out);
1042
1043         pthread_mutex_lock(&tip->mutex);
1044         for (i = 0, evp = events; i < ndone; i++, evp++) {
1045                 struct iocb_pkt *iocbp = evp->data;
1046
1047                 if (evp->res != iocbp->iocb.u.c.nbytes) {
1048                         fatal(NULL, ERR_SYSCALL,
1049                               "Event failure %ld/%ld\t(%ld + %ld)\n",
1050                               (long)evp->res, (long)evp->res2,
1051                               (long)iocbp->iocb.u.c.offset / nb_sec, 
1052                               (long)iocbp->iocb.u.c.nbytes / nb_sec);
1053                         /*NOTREACHED*/
1054                 }
1055
1056                 list_move_tail(&iocbp->head, &tip->free_iocbs);
1057         }
1058
1059         tip->naios_free += ndone;
1060         tip->naios_out -= ndone;
1061         naios_out = minl(naios_out, tip->naios_out);
1062
1063         if (tip->send_wait) {
1064                 tip->send_wait = 0;
1065                 pthread_cond_signal(&tip->cond);
1066         }
1067         pthread_mutex_unlock(&tip->mutex);
1068
1069         /*
1070          * Short cut: If we /know/ there are some more AIOs, go handle them
1071          */
1072         if (naios_out)
1073                 goto again;
1074 }
1075
1076 /**
1077  * replay_rec - Worker thread to reclaim AIOs
1078  * @arg: Pointer to thread information
1079  */
1080 static void *replay_rec(void *arg)
1081 {
1082         long naios_out;
1083         struct thr_info *tip = arg;
1084
1085         while ((naios_out = reap_wait_aios(tip)) > 0) 
1086                 reclaim_ios(tip, naios_out);
1087
1088         assert(tip->send_done);
1089         tip->reap_done = 1;
1090         set_reclaim_done();
1091
1092         return NULL;
1093 }
1094
1095 /* 
1096  * ========================================================================
1097  * ==== REPLAY ROUTINES ===================================================
1098  * ========================================================================
1099  */
1100
1101 /**
1102  * next_bunch - Retrieve next bunch of AIOs to process
1103  * @tip: Per-thread information
1104  * @bunch: Bunch information
1105  *
1106  * Returns TRUE if we recovered a bunch of IOs, else hit EOF
1107  */
1108 static int next_bunch(struct thr_info *tip, struct io_bunch *bunch)
1109 {
1110         size_t count, result;
1111         
1112         result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr));
1113         if (result != sizeof(bunch->hdr)) {
1114                 if (result == 0)
1115                         return 0;
1116
1117                 fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n", 
1118                         (long)result);
1119                 /*NOTREACHED*/
1120         }
1121         assert(bunch->hdr.npkts <= BT_MAX_PKTS);
1122
1123         count = bunch->hdr.npkts * sizeof(struct io_pkt);
1124         result = read(tip->ifd, &bunch->pkts, count);
1125         if (result != count) {
1126                 fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n", 
1127                         (long)result, (long)count);
1128                 /*NOTREACHED*/
1129         }
1130
1131         return 1;
1132 }
1133
1134 /**
1135  * nfree_current - Returns current number of AIOs that are free
1136  *
1137  * Will wait for available ones...
1138  *
1139  * Returns 0 if we have some condition that causes us to exit
1140  */
1141 static int nfree_current(struct thr_info *tip)
1142 {
1143         int nfree = 0;
1144
1145         pthread_mutex_lock(&tip->mutex);
1146         while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) {
1147                 tip->send_wait = 1;
1148                 if (pthread_cond_wait(&tip->cond, &tip->mutex)) {
1149                         fatal("pthread_cond_wait", ERR_SYSCALL, 
1150                                 "nfree_current cond wait failed\n");
1151                         /*NOTREACHED*/
1152                 }
1153         }
1154         pthread_mutex_unlock(&tip->mutex);
1155
1156         return nfree;
1157 }
1158
1159 /**
1160  * stall - Stall for the number of nanoseconds requested
1161  *
1162  * We may be late, in which case we just return.
1163  */
1164 static void stall(struct thr_info *tip, long long oclock)
1165 {
1166         struct timespec req;
1167         long long dreal, tclock = gettime() - rgenesis;
1168
1169         if (verbose > 1)
1170                 fprintf(tip->vfp, "   stall(%lld.%09lld, %lld.%09lld)\n",
1171                         du64_to_sec(oclock), du64_to_nsec(oclock),
1172                         du64_to_sec(tclock), du64_to_nsec(tclock));
1173
1174         while (!is_send_done(tip) && tclock < oclock) {
1175                 dreal = oclock - tclock;
1176                 req.tv_sec = dreal / (1000 * 1000 * 1000);
1177                 req.tv_nsec = dreal % (1000 * 1000 * 1000);
1178
1179                 if (verbose > 1) {
1180                         fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n",
1181                                 (long long)req.tv_sec,
1182                                 (long long)req.tv_nsec);
1183                 }
1184
1185                 if (nanosleep(&req, NULL) < 0 && signal_done)
1186                         break;
1187
1188                 tclock = gettime() - rgenesis;
1189         }
1190 }
1191
1192 /**
1193  * iocbs_map - Map a set of AIOs onto a set of IOCBs
1194  * @tip: Per-thread information
1195  * @list: List of AIOs created
1196  * @pkts: AIOs to map
1197  * @ntodo: Number of AIOs to map
1198  */
1199 static void iocbs_map(struct thr_info *tip, struct iocb **list, 
1200                                              struct io_pkt *pkts, int ntodo)
1201 {
1202         int i;
1203         struct io_pkt *pkt;
1204
1205         assert(0 < ntodo && ntodo <= naios);
1206
1207         pthread_mutex_lock(&tip->mutex);
1208         assert(ntodo <= list_len(&tip->free_iocbs));
1209         for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) {
1210                 __u32 rw = pkt->rw;
1211                 struct iocb_pkt *iocbp;
1212
1213                 if (!pkt->rw && !write_enabled)
1214                         rw = 1;
1215
1216                 if (verbose > 1)
1217                         fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n",
1218                                 (unsigned long long)pkt->sector, 
1219                                 (unsigned long long)pkt->nbytes / nb_sec,
1220                                 rw ? 'R' : 'W', 
1221                                 (rw == 1 && pkt->rw == 0) ? '!' : ' ');
1222                 
1223                 iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head);
1224                 iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec);
1225
1226                 list_move_tail(&iocbp->head, &tip->used_iocbs);
1227                 list[i] = &iocbp->iocb;
1228         }
1229
1230         tip->naios_free -= ntodo;
1231         assert(tip->naios_free >= 0);
1232         pthread_mutex_unlock(&tip->mutex);
1233 }
1234
1235 /**
1236  * process_bunch - Process a bunch of requests
1237  * @tip: Per-thread information
1238  * @bunch: Bunch to process
1239  */
1240 static void process_bunch(struct thr_info *tip, struct io_bunch *bunch)
1241 {
1242         __u64 i = 0;
1243         struct iocb *list[bunch->hdr.npkts];
1244
1245         assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS);
1246         while (!is_send_done(tip) && (i < bunch->hdr.npkts)) {
1247                 long ndone;
1248                 int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i);
1249
1250                 assert(0 < ntodo && ntodo <= naios);
1251                 iocbs_map(tip, list, &bunch->pkts[i], ntodo);
1252                 if (!no_stalls)
1253                         stall(tip, bunch->hdr.time_stamp - genesis);
1254
1255                 if (ntodo) {
1256                         if (verbose > 1)
1257                                 fprintf(tip->vfp, "submit(%d)\n", ntodo);
1258                         ndone = io_submit(tip->ctx, ntodo, list);
1259                         if (ndone != (long)ntodo) {
1260                                 fatal("io_submit", ERR_SYSCALL,
1261                                         "%d: io_submit(%d:%ld) failed (%s)\n", 
1262                                         tip->cpu, ntodo, ndone, 
1263                                         strerror(labs(ndone)));
1264                                 /*NOTREACHED*/
1265                         }
1266
1267                         pthread_mutex_lock(&tip->mutex);
1268                         tip->naios_out += ndone;
1269                         assert(tip->naios_out <= naios);
1270                         if (tip->reap_wait) {
1271                                 tip->reap_wait = 0;
1272                                 pthread_cond_signal(&tip->cond);
1273                         }
1274                         pthread_mutex_unlock(&tip->mutex);
1275
1276                         i += ndone;
1277                         assert(i <= bunch->hdr.npkts);
1278                 }
1279         }
1280 }
1281
1282 /**
1283  * reset_input_file - Reset the input file for the next iteration
1284  * @tip: Thread information
1285  *
1286  * We also do a dummy read of the file header to get us to the first bunch.
1287  */
1288 static void reset_input_file(struct thr_info *tip)
1289 {
1290         struct io_file_hdr hdr;
1291
1292         lseek(tip->ifd, 0, 0);
1293
1294         if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
1295                 fatal(tip->file_name, ERR_ARGS, "Header reread failed\n");
1296                 /*NOTREACHED*/
1297         }
1298 }
1299
1300 /**
1301  * replay_sub - Worker thread to submit AIOs that are being replayed
1302  */
1303 static void *replay_sub(void *arg)
1304 {
1305         char path[MAXPATHLEN];
1306         struct io_bunch bunch;
1307         struct thr_info *tip = arg;
1308
1309         pin_to_cpu(tip);
1310
1311         sprintf(path, "/dev/%s", map_dev(tip->devnm));
1312         tip->ofd = open(path, O_RDWR | O_DIRECT);
1313         if (tip->ofd < 0) {
1314                 fatal(path, ERR_SYSCALL, "Failed device open\n");
1315                 /*NOTREACHED*/
1316         }
1317
1318         set_replay_ready();
1319         while (!is_send_done(tip) && tip->iterations--) {
1320                 wait_iter_start();
1321                 if (verbose)
1322                         fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations);
1323                 while (!is_send_done(tip) && next_bunch(tip, &bunch))
1324                         process_bunch(tip, &bunch);
1325                 set_iter_done();
1326                 reset_input_file(tip);
1327         }
1328         tip->send_done = 1;
1329         set_replay_done();
1330
1331         return NULL;
1332 }
1333
1334 /* 
1335  * ========================================================================
1336  * ==== COMMAND LINE ARGUMENT HANDLING ====================================
1337  * ========================================================================
1338  */
1339
1340 static char usage_str[] =                                               \
1341         "\n"                                                           \
1342         "\t[ -c <cpus> : --cpus=<cpus>           ] Default: 1\n"        \
1343         "\t[ -d <dir>  : --input-directory=<dir> ] Default: .\n"        \
1344         "\t[ -F        : --find-records          ] Default: Off\n"         \
1345         "\t[ -h        : --help                  ] Default: Off\n"      \
1346         "\t[ -i <base> : --input-base=<base>     ] Default: replay\n"   \
1347         "\t[ -I <iters>: --iterations=<iters>    ] Default: 1\n"        \
1348         "\t[ -M <file> : --map-devs=<file>       ] Default: None\n"     \
1349         "\t[ -N        : --no-stalls             ] Default: Off\n"      \
1350         "\t[ -v        : --verbose               ] Default: Off\n"      \
1351         "\t[ -V        : --version               ] Default: Off\n"      \
1352         "\t[ -W        : --write-enable          ] Default: Off\n"      \
1353         "\t<dev...>                                Default: None\n"     \
1354         "\n";
1355
1356 #define S_OPTS  "c:d:Fhi:I:M:Nt:vVW"
1357 static struct option l_opts[] = {
1358         {
1359                 .name = "cpus",
1360                 .has_arg = required_argument,
1361                 .flag = NULL,
1362                 .val = 'c'
1363         },
1364         {
1365                 .name = "input-directory",
1366                 .has_arg = required_argument,
1367                 .flag = NULL,
1368                 .val = 'd'
1369         },
1370         {
1371                 .name = "find-records",
1372                 .has_arg = no_argument,
1373                 .flag = NULL,
1374                 .val = 'F'
1375         },
1376         {
1377                 .name = "help",
1378                 .has_arg = no_argument,
1379                 .flag = NULL,
1380                 .val = 'h'
1381         },
1382         {
1383                 .name = "input-base",
1384                 .has_arg = required_argument,
1385                 .flag = NULL,
1386                 .val = 'i'
1387         },
1388         {
1389                 .name = "iterations",
1390                 .has_arg = required_argument,
1391                 .flag = NULL,
1392                 .val = 'I'
1393         },
1394         {
1395                 .name = "map-devs",
1396                 .has_arg = required_argument,
1397                 .flag = NULL,
1398                 .val = 'M'
1399         },
1400         {
1401                 .name = "no-stalls",
1402                 .has_arg = no_argument,
1403                 .flag = NULL,
1404                 .val = 'N'
1405         },
1406         {
1407                 .name = "verbose",
1408                 .has_arg = no_argument,
1409                 .flag = NULL,
1410                 .val = 'v'
1411         },
1412         {
1413                 .name = "version",
1414                 .has_arg = no_argument,
1415                 .flag = NULL,
1416                 .val = 'V'
1417         },
1418         {
1419                 .name = "write-enable",
1420                 .has_arg = no_argument,
1421                 .flag = NULL,
1422                 .val = 'W'
1423         },
1424         {
1425                 .name = NULL
1426         }
1427 };
1428
1429 /**
1430  * handle_args: Parse passed in argument list
1431  * @argc: Number of arguments in argv
1432  * @argv: Arguments passed in
1433  *
1434  * Does rudimentary parameter verification as well.
1435  */
1436 static void handle_args(int argc, char *argv[])
1437 {
1438         int c;
1439
1440         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
1441                 switch (c) {
1442                 case 'c': 
1443                         cpus_to_use = atoi(optarg);
1444                         if (cpus_to_use <= 0 || cpus_to_use > ncpus) {
1445                                 fatal(NULL, ERR_ARGS, 
1446                                       "Invalid number of cpus %d (0<x<%d)\n",
1447                                       cpus_to_use, ncpus);
1448                                 /*NOTREACHED*/
1449                         }
1450                         break;
1451
1452                 case 'd':
1453                         idir = optarg;
1454                         if (access(idir, R_OK | X_OK) != 0) {
1455                                 fatal(idir, ERR_ARGS, 
1456                                       "Invalid input directory specified\n");
1457                                 /*NOTREACHED*/
1458                         }
1459                         break;
1460
1461                 case 'F': 
1462                         find_records = 1;
1463                         break;
1464
1465                 case 'h': 
1466                         usage(); 
1467                         exit(0);
1468                         /*NOTREACHED*/
1469
1470                 case 'i': 
1471                         ibase = optarg;
1472                         break;
1473
1474                 case 'I':
1475                         def_iterations = atoi(optarg);
1476                         if (def_iterations <= 0) {
1477                                 fprintf(stderr, 
1478                                         "Invalid number of iterations %d\n",
1479                                         def_iterations);
1480                                 exit(ERR_ARGS);
1481                                 /*NOTREACHED*/
1482                         }
1483                         break;
1484
1485                 case 'M':
1486                         read_map_devs(optarg);
1487                         break;
1488
1489                 case 'N':
1490                         no_stalls = 1;
1491                         break;
1492
1493                 case 'V':
1494                         fprintf(stderr, "btreplay -- version %s\n", 
1495                                 my_btversion);
1496                         fprintf(stderr, "            Built on %s\n", 
1497                                 build_date);
1498                         exit(0);
1499                         /*NOTREACHED*/
1500
1501                 case 'v':
1502                         verbose++;
1503                         break;
1504
1505                 case 'W':
1506                         write_enabled = 1;
1507                         break;
1508
1509                 default:
1510                         usage();
1511                         fatal(NULL, ERR_ARGS, 
1512                               "Invalid command line argument %c\n", c);
1513                         /*NOTREACHED*/
1514                 }
1515         }
1516
1517         while (optind < argc)
1518                 add_input_dev(argv[optind++]);
1519
1520         if (find_records)
1521                 find_input_devs(idir);
1522
1523         if (list_len(&input_devs) == 0) {
1524                 fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n");
1525                 /*NOTREACHED*/
1526         }
1527
1528         if (cpus_to_use < 0)
1529                 cpus_to_use = ncpus;
1530 }
1531
1532 /* 
1533  * ========================================================================
1534  * ==== MAIN ROUTINE ======================================================
1535  * ========================================================================
1536  */
1537
1538 /**
1539  * set_signal_done - Signal handler, catches signals & sets signal_done
1540  */
1541 static void set_signal_done(__attribute__((__unused__))int signum)
1542 {
1543         signal_done = 1;
1544 }
1545
1546 /**
1547  * main - 
1548  * @argc: Number of arguments
1549  * @argv: Array of arguments
1550  */
1551 int main(int argc, char *argv[])
1552 {
1553         int i;
1554         struct list_head *p;
1555
1556         pgsize = getpagesize();
1557         assert(pgsize > 0);
1558
1559         setup_signal(SIGINT, set_signal_done);
1560         setup_signal(SIGTERM, set_signal_done);
1561
1562         get_ncpus();
1563         handle_args(argc, argv);
1564         find_input_files();
1565
1566         nfiles = list_len(&input_files);
1567         __list_for_each(p, &input_files) {
1568                 tip_init(list_entry(p, struct thr_info, head));
1569         }
1570
1571         wait_replays_ready();
1572         for (i = 0; i < def_iterations; i++) {
1573                 rgenesis = gettime();
1574                 start_iter();
1575                 if (verbose)
1576                         fprintf(stderr, "I");
1577                 wait_iters_done();
1578         }
1579
1580         wait_replays_done();
1581         wait_reclaims_done();
1582
1583         if (verbose)
1584                 fprintf(stderr, "\n");
1585
1586         rem_input_files();
1587         release_map_devs();
1588
1589         return 0;
1590 }