Commit | Line | Data |
---|---|---|
d47a3fec AB |
1 | /* |
2 | * Blktrace replay utility - Play traces back | |
3 | * | |
4 | * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 | */ | |
20 | ||
21 | static char build_date[] = __DATE__ " at "__TIME__; | |
22 | ||
23 | #include <assert.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <libaio.h> | |
27 | #include <pthread.h> | |
28 | #include <sched.h> | |
29 | #include <signal.h> | |
30 | #include <stdio.h> | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | #include <time.h> | |
34 | #include <unistd.h> | |
35 | #include <sys/param.h> | |
36 | #include <sys/stat.h> | |
37 | #include <sys/time.h> | |
38 | #include <sys/types.h> | |
39 | #include <dirent.h> | |
65a7043b | 40 | #include <stdarg.h> |
d47a3fec AB |
41 | |
42 | #if !defined(_GNU_SOURCE) | |
43 | # define _GNU_SOURCE | |
44 | #endif | |
45 | #include <getopt.h> | |
46 | ||
47 | #include "list.h" | |
48 | #include "btrecord.h" | |
49 | ||
50 | /* | |
51 | * ======================================================================== | |
52 | * ==== STRUCTURE DEFINITIONS ============================================= | |
53 | * ======================================================================== | |
54 | */ | |
55 | ||
56 | /** | |
57 | * Each device map has one of these: | |
58 | * | |
59 | * @head: Linked on to map_devs | |
60 | * @from_dev: Device name as seen on recorded system | |
61 | * @to_dev: Device name to be used on replay system | |
62 | */ | |
63 | struct map_dev { | |
64 | struct list_head head; | |
65 | char *from_dev, *to_dev; | |
66 | }; | |
67 | ||
68 | /** | |
69 | * Each device name specified has one of these (until threads are created) | |
70 | * | |
71 | * @head: Linked onto input_devs | |
72 | * @devnm: Device name -- 'sd*' | |
73 | */ | |
74 | struct dev_info { | |
75 | struct list_head head; | |
76 | char *devnm; | |
77 | }; | |
78 | ||
79 | /* | |
80 | * Per input file information | |
81 | * | |
82 | * @head: Used to link up on input_files | |
83 | * @free_iocbs: List of free iocb's available for use | |
84 | * @used_iocbs: List of iocb's currently outstanding | |
85 | * @mutex: Mutex used with condition variable to protect volatile values | |
86 | * @cond: Condition variable used when waiting on a volatile value change | |
87 | * @naios_out: Current number of AIOs outstanding on this context | |
88 | * @naios_free: Number of AIOs on the free list (short cut for list_len) | |
89 | * @send_wait: Boolean: When true, the sub thread is waiting on free IOCBs | |
90 | * @reap_wait: Boolean: When true, the rec thread is waiting on used IOCBs | |
91 | * @send_done: Boolean: When true, the sub thread has completed work | |
92 | * @reap_done: Boolean: When true, the rec thread has completed work | |
93 | * @sub_thread: Thread used to submit IOs. | |
94 | * @rec_thread: Thread used to reclaim IOs. | |
95 | * @ctx: IO context | |
96 | * @devnm: Copy of the device name being managed by this thread | |
97 | * @file_name: Full name of the input file | |
98 | * @cpu: CPU this thread is pinned to | |
99 | * @ifd: Input file descriptor | |
100 | * @ofd: Output file descriptor | |
101 | * @iterations: Remaining iterations to process | |
102 | * @vfp: For verbose dumping of actions performed | |
103 | */ | |
104 | struct thr_info { | |
105 | struct list_head head, free_iocbs, used_iocbs; | |
106 | pthread_mutex_t mutex; | |
107 | pthread_cond_t cond; | |
108 | volatile long naios_out, naios_free; | |
109 | volatile int send_wait, reap_wait, send_done, reap_done; | |
110 | pthread_t sub_thread, rec_thread; | |
111 | io_context_t ctx; | |
112 | char *devnm, *file_name; | |
113 | int cpu, ifd, ofd, iterations; | |
114 | FILE *vfp; | |
115 | }; | |
116 | ||
117 | /* | |
118 | * Every Asynchronous IO used has one of these (naios per file/device). | |
119 | * | |
120 | * @iocb: IOCB sent down via io_submit | |
121 | * @head: Linked onto file_list.free_iocbs or file_list.used_iocbs | |
122 | * @tip: Pointer to per-thread information this IO is associated with | |
123 | * @nbytes: Number of bytes in buffer associated with iocb | |
124 | */ | |
125 | struct iocb_pkt { | |
126 | struct iocb iocb; | |
127 | struct list_head head; | |
128 | struct thr_info *tip; | |
129 | int nbytes; | |
130 | }; | |
131 | ||
132 | /* | |
133 | * ======================================================================== | |
134 | * ==== GLOBAL VARIABLES ================================================== | |
135 | * ======================================================================== | |
136 | */ | |
137 | ||
138 | static volatile int signal_done = 0; // Boolean: Signal'ed, need to quit | |
139 | ||
140 | static char *ibase = "replay"; // Input base name | |
141 | static char *idir = "."; // Input directory base | |
142 | static int cpus_to_use = -1; // Number of CPUs to use | |
143 | static int def_iterations = 1; // Default number of iterations | |
144 | static int naios = 512; // Number of AIOs per thread | |
145 | static int ncpus = 0; // Number of CPUs in the system | |
146 | static int verbose = 0; // Boolean: Output some extra info | |
147 | static int write_enabled = 0; // Boolean: Enable writing | |
148 | static __u64 genesis = ~0; // Earliest time seen | |
149 | static __u64 rgenesis; // Our start time | |
150 | static size_t pgsize; // System Page size | |
151 | static int nb_sec = 512; // Number of bytes per sector | |
152 | static LIST_HEAD(input_devs); // List of devices to handle | |
153 | static LIST_HEAD(input_files); // List of input files to handle | |
154 | static LIST_HEAD(map_devs); // List of device maps | |
155 | static int nfiles = 0; // Number of files to handle | |
156 | static int no_stalls = 0; // Boolean: Disable pre-stalls | |
157 | static int find_records = 0; // Boolean: Find record files auto | |
158 | ||
159 | /* | |
160 | * Variables managed under control of condition variables. | |
161 | * | |
162 | * n_reclaims_done: Counts number of reclaim threads that have completed. | |
163 | * n_replays_done: Counts number of replay threads that have completed. | |
164 | * n_replays_ready: Counts number of replay threads ready to start. | |
165 | * n_iters_done: Counts number of replay threads done one iteration. | |
166 | * iter_start: Starts an iteration for the replay threads. | |
167 | */ | |
168 | static volatile int n_reclaims_done = 0; | |
169 | static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
170 | static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER; | |
171 | ||
172 | static volatile int n_replays_done = 0; | |
173 | static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
174 | static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER; | |
175 | ||
176 | static volatile int n_replays_ready = 0; | |
177 | static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER; | |
178 | static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER; | |
179 | ||
180 | static volatile int n_iters_done = 0; | |
181 | static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
182 | static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER; | |
183 | ||
184 | static volatile int iter_start = 0; | |
185 | static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER; | |
186 | static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER; | |
187 | ||
188 | /* | |
189 | * ======================================================================== | |
190 | * ==== FORWARD REFERENECES =============================================== | |
191 | * ======================================================================== | |
192 | */ | |
193 | ||
194 | static void *replay_sub(void *arg); | |
195 | static void *replay_rec(void *arg); | |
196 | static char usage_str[]; | |
197 | ||
198 | /* | |
199 | * ======================================================================== | |
200 | * ==== INLINE ROUTINES =================================================== | |
201 | * ======================================================================== | |
202 | */ | |
203 | ||
204 | /* | |
205 | * The 'fatal' macro will output a perror message (if errstring is !NULL) | |
206 | * and display a string (with variable arguments) and then exit with the | |
207 | * specified exit value. | |
208 | */ | |
209 | #define ERR_ARGS 1 | |
210 | #define ERR_SYSCALL 2 | |
65a7043b AB |
211 | static inline void fatal(const char *errstring, const int exitval, |
212 | const char *fmt, ...) | |
213 | { | |
214 | va_list ap; | |
215 | ||
216 | if (errstring) | |
217 | perror(errstring); | |
218 | ||
219 | va_start(ap, fmt); | |
220 | vfprintf(stderr, fmt, ap); | |
221 | va_end(ap); | |
222 | ||
223 | exit(exitval); | |
224 | /*NOTREACHED*/ | |
225 | } | |
d47a3fec AB |
226 | |
227 | static inline long long unsigned du64_to_sec(__u64 du64) | |
228 | { | |
229 | return (long long unsigned)du64 / (1000 * 1000 * 1000); | |
230 | } | |
231 | ||
232 | static inline long long unsigned du64_to_nsec(__u64 du64) | |
233 | { | |
234 | return llabs((long long)du64) % (1000 * 1000 * 1000); | |
235 | } | |
236 | ||
237 | /** | |
238 | * min - Return minimum of two integers | |
239 | */ | |
240 | static inline int min(int a, int b) | |
241 | { | |
242 | return a < b ? a : b; | |
243 | } | |
244 | ||
245 | /** | |
246 | * minl - Return minimum of two longs | |
247 | */ | |
248 | static inline long minl(long a, long b) | |
249 | { | |
250 | return a < b ? a : b; | |
251 | } | |
252 | ||
253 | /** | |
254 | * usage - Display usage string and version | |
255 | */ | |
256 | static inline void usage(void) | |
257 | { | |
258 | fprintf(stderr, "Usage: btreplay -- version %s\n%s", | |
259 | my_btversion, usage_str); | |
260 | } | |
261 | ||
262 | /** | |
263 | * is_send_done - Returns true if sender should quit early | |
264 | * @tip: Per-thread information | |
265 | */ | |
266 | static inline int is_send_done(struct thr_info *tip) | |
267 | { | |
268 | return signal_done || tip->send_done; | |
269 | } | |
270 | ||
271 | /** | |
272 | * is_reap_done - Returns true if reaper should quit early | |
273 | * @tip: Per-thread information | |
274 | */ | |
275 | static inline int is_reap_done(struct thr_info *tip) | |
276 | { | |
277 | return tip->send_done && tip->naios_out == 0; | |
278 | } | |
279 | ||
280 | /** | |
281 | * ts2ns - Convert timespec values to a nanosecond value | |
282 | */ | |
283 | #define NS_TICKS ((__u64)1000 * (__u64)1000 * (__u64)1000) | |
284 | static inline __u64 ts2ns(struct timespec *ts) | |
285 | { | |
286 | return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec); | |
287 | } | |
288 | ||
289 | /** | |
290 | * ts2ns - Convert timeval values to a nanosecond value | |
291 | */ | |
292 | static inline __u64 tv2ns(struct timeval *tp) | |
293 | { | |
294 | return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000); | |
295 | } | |
296 | ||
297 | /** | |
298 | * touch_memory - Force physical memory to be allocating it | |
299 | * | |
300 | * For malloc()ed memory we need to /touch/ it to make it really | |
301 | * exist. Otherwise, for write's (to storage) things may not work | |
302 | * as planned - we see Linux just use a single area to /read/ from | |
303 | * (as there isn't any memory that has been associated with the | |
304 | * allocated virtual addresses yet). | |
305 | */ | |
306 | static inline void touch_memory(char *buf, size_t bsize) | |
307 | { | |
308 | #if defined(PREP_BUFS) | |
309 | memset(buf, 0, bsize); | |
310 | #else | |
311 | size_t i; | |
312 | ||
313 | for (i = 0; i < bsize; i += pgsize) | |
314 | buf[i] = 0; | |
315 | #endif | |
316 | } | |
317 | ||
318 | /** | |
319 | * buf_alloc - Returns a page-aligned buffer of the specified size | |
320 | * @nbytes: Number of bytes to allocate | |
321 | */ | |
322 | static inline void *buf_alloc(size_t nbytes) | |
323 | { | |
324 | void *buf; | |
325 | ||
326 | if (posix_memalign(&buf, pgsize, nbytes)) { | |
327 | fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n"); | |
328 | /*NOTREACHED*/ | |
329 | } | |
330 | ||
331 | return buf; | |
332 | } | |
333 | ||
334 | /** | |
335 | * gettime - Returns current time | |
336 | */ | |
337 | static inline __u64 gettime(void) | |
338 | { | |
339 | static int use_clock_gettime = -1; // Which clock to use | |
340 | ||
341 | if (use_clock_gettime < 0) { | |
342 | use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0; | |
343 | if (use_clock_gettime) { | |
344 | struct timespec ts = { | |
345 | .tv_sec = 0, | |
346 | .tv_nsec = 0 | |
347 | }; | |
348 | clock_settime(CLOCK_MONOTONIC, &ts); | |
349 | } | |
350 | } | |
351 | ||
352 | if (use_clock_gettime) { | |
353 | struct timespec ts; | |
354 | clock_gettime(CLOCK_MONOTONIC, &ts); | |
355 | return ts2ns(&ts); | |
356 | } | |
357 | else { | |
358 | struct timeval tp; | |
359 | gettimeofday(&tp, NULL); | |
360 | return tv2ns(&tp); | |
361 | } | |
362 | } | |
363 | ||
364 | /** | |
365 | * setup_signal - Set up a signal handler for the specified signum | |
366 | */ | |
367 | static inline void setup_signal(int signum, sighandler_t handler) | |
368 | { | |
369 | if (signal(signum, handler) == SIG_ERR) { | |
370 | fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n", | |
371 | signum); | |
372 | /*NOTREACHED*/ | |
373 | } | |
374 | } | |
375 | ||
376 | /* | |
377 | * ======================================================================== | |
378 | * ==== CONDITION VARIABLE ROUTINES ======================================= | |
379 | * ======================================================================== | |
380 | */ | |
381 | ||
382 | /** | |
383 | * __set_cv - Increments a variable under condition variable control. | |
384 | * @pmp: Pointer to the associated mutex | |
385 | * @pcp: Pointer to the associated condition variable | |
386 | * @vp: Pointer to the variable being incremented | |
387 | * @mxv: Max value for variable (Used only when ASSERTS are on) | |
388 | */ | |
389 | static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp, | |
390 | volatile int *vp, | |
391 | __attribute__((__unused__))int mxv) | |
392 | { | |
393 | pthread_mutex_lock(pmp); | |
394 | assert(*vp < mxv); | |
395 | *vp += 1; | |
396 | pthread_cond_signal(pcp); | |
397 | pthread_mutex_unlock(pmp); | |
398 | } | |
399 | ||
400 | /** | |
401 | * __wait_cv - Waits for a variable under cond var control to hit a value | |
402 | * @pmp: Pointer to the associated mutex | |
403 | * @pcp: Pointer to the associated condition variable | |
404 | * @vp: Pointer to the variable being incremented | |
405 | * @mxv: Value to wait for | |
406 | */ | |
407 | static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp, | |
408 | volatile int *vp, int mxv) | |
409 | { | |
410 | pthread_mutex_lock(pmp); | |
411 | while (*vp < mxv) | |
412 | pthread_cond_wait(pcp, pmp); | |
413 | *vp = 0; | |
414 | pthread_mutex_unlock(pmp); | |
415 | } | |
416 | ||
417 | static inline void set_reclaim_done(void) | |
418 | { | |
419 | __set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done, | |
420 | nfiles); | |
421 | } | |
422 | ||
423 | static inline void wait_reclaims_done(void) | |
424 | { | |
425 | __wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done, | |
426 | nfiles); | |
427 | } | |
428 | ||
429 | static inline void set_replay_ready(void) | |
430 | { | |
431 | __set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready, | |
432 | nfiles); | |
433 | } | |
434 | ||
435 | static inline void wait_replays_ready(void) | |
436 | { | |
437 | __wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready, | |
438 | nfiles); | |
439 | } | |
440 | ||
441 | static inline void set_replay_done(void) | |
442 | { | |
443 | __set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done, | |
444 | nfiles); | |
445 | } | |
446 | ||
447 | static inline void wait_replays_done(void) | |
448 | { | |
449 | __wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done, | |
450 | nfiles); | |
451 | } | |
452 | ||
453 | static inline void set_iter_done(void) | |
454 | { | |
455 | __set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done, | |
456 | nfiles); | |
457 | } | |
458 | ||
459 | static inline void wait_iters_done(void) | |
460 | { | |
461 | __wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done, | |
462 | nfiles); | |
463 | } | |
464 | ||
465 | /** | |
466 | * wait_iter_start - Wait for an iteration to start | |
467 | * | |
468 | * This is /slightly/ different: we are waiting for a value to become | |
469 | * non-zero, and then we decrement it and go on. | |
470 | */ | |
471 | static inline void wait_iter_start(void) | |
472 | { | |
473 | pthread_mutex_lock(&iter_start_mutex); | |
474 | while (iter_start == 0) | |
475 | pthread_cond_wait(&iter_start_cond, &iter_start_mutex); | |
476 | assert(1 <= iter_start && iter_start <= nfiles); | |
477 | iter_start--; | |
478 | pthread_mutex_unlock(&iter_start_mutex); | |
479 | } | |
480 | ||
481 | /** | |
482 | * start_iter - Start an iteration at the replay thread level | |
483 | */ | |
484 | static inline void start_iter(void) | |
485 | { | |
486 | pthread_mutex_lock(&iter_start_mutex); | |
487 | assert(iter_start == 0); | |
488 | iter_start = nfiles; | |
489 | pthread_cond_broadcast(&iter_start_cond); | |
490 | pthread_mutex_unlock(&iter_start_mutex); | |
491 | } | |
492 | ||
493 | /* | |
494 | * ======================================================================== | |
495 | * ==== CPU RELATED ROUTINES ============================================== | |
496 | * ======================================================================== | |
497 | */ | |
498 | ||
499 | /** | |
500 | * get_ncpus - Sets up the global 'ncpus' value | |
501 | */ | |
502 | static void get_ncpus(void) | |
503 | { | |
504 | cpu_set_t cpus; | |
505 | ||
506 | if (sched_getaffinity(getpid(), sizeof(cpus), &cpus)) { | |
507 | fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n"); | |
508 | /*NOTREACHED*/ | |
509 | } | |
510 | ||
511 | /* | |
512 | * XXX This assumes (perhaps wrongly) that there are no /holes/ | |
513 | * XXX in the mask. | |
514 | */ | |
515 | for (ncpus = 0; ncpus < CPU_SETSIZE && CPU_ISSET(ncpus, &cpus); ncpus++) | |
516 | ; | |
517 | if (ncpus == 0) { | |
518 | fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n"); | |
519 | /*NOTREACHED*/ | |
520 | } | |
521 | } | |
522 | ||
523 | /** | |
524 | * pin_to_cpu - Pin this thread to a specific CPU | |
525 | * @tip: Thread information | |
526 | */ | |
527 | static void pin_to_cpu(struct thr_info *tip) | |
528 | { | |
529 | cpu_set_t cpus; | |
530 | ||
531 | assert(0 <= tip->cpu && tip->cpu < ncpus); | |
532 | ||
533 | CPU_ZERO(&cpus); | |
534 | CPU_SET(tip->cpu, &cpus); | |
535 | if (sched_setaffinity(getpid(), sizeof(cpus), &cpus)) { | |
536 | fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n"); | |
537 | /*NOTREACHED*/ | |
538 | } | |
539 | ||
540 | if (verbose > 1) { | |
541 | int i; | |
542 | cpu_set_t now; | |
543 | ||
544 | (void)sched_getaffinity(getpid(), sizeof(now), &now); | |
545 | fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu); | |
546 | for (i = 0; i < ncpus; i++) | |
547 | fprintf(tip->vfp, "%1d", CPU_ISSET(i, &now)); | |
548 | fprintf(tip->vfp, "\n"); | |
549 | } | |
550 | } | |
551 | ||
552 | /* | |
553 | * ======================================================================== | |
554 | * ==== INPUT DEVICE HANDLERS ============================================= | |
555 | * ======================================================================== | |
556 | */ | |
557 | ||
558 | /** | |
559 | * add_input_dev - Add a device ('sd*') to the list of devices to handle | |
560 | */ | |
561 | static void add_input_dev(char *devnm) | |
562 | { | |
563 | struct list_head *p; | |
564 | struct dev_info *dip; | |
565 | ||
566 | __list_for_each(p, &input_devs) { | |
567 | dip = list_entry(p, struct dev_info, head); | |
568 | if (strcmp(dip->devnm, devnm) == 0) | |
569 | return; | |
570 | } | |
571 | ||
572 | dip = malloc(sizeof(*dip)); | |
573 | dip->devnm = strdup(devnm); | |
574 | list_add_tail(&dip->head, &input_devs); | |
575 | } | |
576 | ||
577 | /** | |
578 | * rem_input_dev - Remove resources associated with this device | |
579 | */ | |
580 | static void rem_input_dev(struct dev_info *dip) | |
581 | { | |
582 | list_del(&dip->head); | |
583 | free(dip->devnm); | |
584 | free(dip); | |
585 | } | |
586 | ||
587 | static void find_input_devs(char *idir) | |
588 | { | |
589 | struct dirent *ent; | |
590 | DIR *dir = opendir(idir); | |
591 | ||
592 | if (dir == NULL) { | |
593 | fatal(idir, ERR_ARGS, "Unable to open %s\n", idir); | |
594 | /*NOTREACHED*/ | |
595 | } | |
596 | ||
597 | while ((ent = readdir(dir)) != NULL) { | |
598 | char *p, *dsf = malloc(256); | |
599 | ||
600 | if (strstr(ent->d_name, ".replay.") == NULL) | |
601 | continue; | |
602 | ||
603 | dsf = strdup(ent->d_name); | |
604 | p = index(dsf, '.'); | |
605 | assert(p != NULL); | |
606 | *p = '\0'; | |
607 | add_input_dev(dsf); | |
608 | free(dsf); | |
609 | } | |
610 | ||
611 | closedir(dir); | |
612 | } | |
613 | ||
614 | /* | |
615 | * ======================================================================== | |
616 | * ==== MAP DEVICE INTERFACES ============================================= | |
617 | * ======================================================================== | |
618 | */ | |
619 | ||
620 | /** | |
621 | * read_map_devs - Read in a set of device mapping from the provided file. | |
622 | * @file_name: File containing device maps | |
623 | * | |
624 | * We support the notion of multiple such files being specifed on the cmd line | |
625 | */ | |
626 | static void read_map_devs(char *file_name) | |
627 | { | |
628 | FILE *fp; | |
629 | char *from_dev, *to_dev; | |
630 | ||
631 | fp = fopen(file_name, "r"); | |
632 | if (!fp) { | |
633 | fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n"); | |
634 | /*NOTREACHED*/ | |
635 | } | |
636 | ||
637 | while (fscanf(fp, "%as %as", &from_dev, &to_dev) == 2) { | |
638 | struct map_dev *mdp = malloc(sizeof(*mdp)); | |
639 | ||
640 | mdp->from_dev = from_dev; | |
641 | mdp->to_dev = to_dev; | |
642 | list_add_tail(&mdp->head, &map_devs); | |
643 | } | |
644 | ||
645 | fclose(fp); | |
646 | } | |
647 | ||
648 | /** | |
649 | * release_map_devs - Release resources associated with device mappings. | |
650 | */ | |
651 | static void release_map_devs(void) | |
652 | { | |
653 | struct list_head *p, *q; | |
654 | ||
655 | list_for_each_safe(p, q, &map_devs) { | |
656 | struct map_dev *mdp = list_entry(p, struct map_dev, head); | |
657 | ||
658 | list_del(&mdp->head); | |
659 | ||
660 | free(mdp->from_dev); | |
661 | free(mdp->to_dev); | |
662 | free(mdp); | |
663 | } | |
664 | } | |
665 | ||
666 | /** | |
667 | * map_dev - Return the mapped device for that specified | |
668 | * @from_dev: Device name as seen on recorded system | |
669 | * | |
670 | * Note: If there is no such mapping, we return the same name. | |
671 | */ | |
672 | static char *map_dev(char *from_dev) | |
673 | { | |
674 | struct list_head *p; | |
675 | ||
676 | __list_for_each(p, &map_devs) { | |
677 | struct map_dev *mdp = list_entry(p, struct map_dev, head); | |
678 | ||
679 | if (strcmp(from_dev, mdp->from_dev) == 0) | |
680 | return mdp->to_dev; | |
681 | } | |
682 | ||
683 | return from_dev; | |
684 | } | |
685 | ||
686 | /* | |
687 | * ======================================================================== | |
688 | * ==== IOCB MANAGEMENT ROUTINES ========================================== | |
689 | * ======================================================================== | |
690 | */ | |
691 | ||
692 | /** | |
693 | * iocb_init - Initialize the fields of an IOCB | |
694 | * @tip: Per-thread information | |
695 | * iocbp: IOCB pointer to update | |
696 | */ | |
697 | static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp) | |
698 | { | |
699 | iocbp->tip = tip; | |
700 | iocbp->nbytes = 0; | |
701 | iocbp->iocb.u.c.buf = NULL; | |
702 | } | |
703 | ||
704 | /** | |
705 | * iocb_setup - Set up an iocb with this AIOs information | |
706 | * @iocbp: IOCB pointer to update | |
707 | * @rw: Direction (0 == write, 1 == read) | |
708 | * @n: Number of bytes to transfer | |
709 | * @off: Offset (in bytes) | |
710 | */ | |
711 | static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off) | |
712 | { | |
713 | char *buf; | |
714 | struct iocb *iop = &iocbp->iocb; | |
715 | ||
716 | assert(rw == 0 || rw == 1); | |
717 | assert(0 < n && (n % nb_sec) == 0); | |
718 | assert(0 <= off); | |
719 | ||
720 | if (iocbp->nbytes) { | |
721 | if (iocbp->nbytes >= n) { | |
722 | buf = iop->u.c.buf; | |
723 | goto prep; | |
724 | } | |
725 | ||
726 | assert(iop->u.c.buf); | |
727 | free(iop->u.c.buf); | |
728 | } | |
729 | ||
730 | buf = buf_alloc(n); | |
731 | iocbp->nbytes = n; | |
732 | ||
733 | prep: | |
734 | if (rw) | |
735 | io_prep_pread(iop, iocbp->tip->ofd, buf, n, off); | |
736 | else { | |
737 | assert(write_enabled); | |
738 | io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off); | |
739 | touch_memory(buf, n); | |
740 | } | |
741 | ||
742 | iop->data = iocbp; | |
743 | } | |
744 | ||
745 | /* | |
746 | * ======================================================================== | |
747 | * ==== PER-THREAD SET UP & TEAR DOWN ===================================== | |
748 | * ======================================================================== | |
749 | */ | |
750 | ||
751 | /** | |
752 | * tip_init - Per thread initialization function | |
753 | */ | |
754 | static void tip_init(struct thr_info *tip) | |
755 | { | |
756 | int i; | |
757 | ||
758 | INIT_LIST_HEAD(&tip->free_iocbs); | |
759 | INIT_LIST_HEAD(&tip->used_iocbs); | |
760 | ||
761 | pthread_mutex_init(&tip->mutex, NULL); | |
762 | pthread_cond_init(&tip->cond, NULL); | |
763 | ||
764 | if (io_setup(naios, &tip->ctx)) { | |
765 | fatal("io_setup", ERR_SYSCALL, "io_setup failed\n"); | |
766 | /*NOTREACHED*/ | |
767 | } | |
768 | ||
769 | tip->ofd = -1; | |
770 | tip->naios_out = 0; | |
771 | tip->send_done = tip->reap_done = 0; | |
772 | tip->send_wait = tip->reap_wait = 0; | |
773 | ||
774 | memset(&tip->sub_thread, 0, sizeof(tip->sub_thread)); | |
775 | memset(&tip->rec_thread, 0, sizeof(tip->rec_thread)); | |
776 | ||
777 | for (i = 0; i < naios; i++) { | |
778 | struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp)); | |
779 | ||
780 | iocb_init(tip, iocbp); | |
781 | list_add_tail(&iocbp->head, &tip->free_iocbs); | |
782 | } | |
783 | tip->naios_free = naios; | |
784 | ||
785 | if (verbose > 1) { | |
786 | char fn[MAXPATHLEN]; | |
787 | ||
788 | sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase, | |
789 | tip->cpu); | |
790 | tip->vfp = fopen(fn, "w"); | |
791 | if (!tip->vfp) { | |
792 | fatal(fn, ERR_SYSCALL, "Failed to open report\n"); | |
793 | /*NOTREACHED*/ | |
794 | } | |
795 | ||
796 | setlinebuf(tip->vfp); | |
797 | } | |
798 | ||
799 | if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) { | |
800 | fatal("pthread_create", ERR_SYSCALL, | |
801 | "thread create failed\n"); | |
802 | /*NOTREACHED*/ | |
803 | } | |
804 | ||
805 | if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) { | |
806 | fatal("pthread_create", ERR_SYSCALL, | |
807 | "thread create failed\n"); | |
808 | /*NOTREACHED*/ | |
809 | } | |
810 | } | |
811 | ||
812 | /** | |
813 | * tip_release - Release resources associated with this thread | |
814 | */ | |
815 | static void tip_release(struct thr_info *tip) | |
816 | { | |
817 | struct list_head *p, *q; | |
818 | ||
819 | assert(tip->send_done); | |
820 | assert(tip->reap_done); | |
821 | assert(list_len(&tip->used_iocbs) == 0); | |
822 | assert(tip->naios_free == naios); | |
823 | ||
824 | if (pthread_join(tip->sub_thread, NULL)) { | |
825 | fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n"); | |
826 | /*NOTREACHED*/ | |
827 | } | |
828 | if (pthread_join(tip->rec_thread, NULL)) { | |
829 | fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n"); | |
830 | /*NOTREACHED*/ | |
831 | } | |
832 | ||
833 | io_destroy(tip->ctx); | |
834 | ||
835 | list_splice(&tip->used_iocbs, &tip->free_iocbs); | |
836 | list_for_each_safe(p, q, &tip->free_iocbs) { | |
837 | struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head); | |
838 | ||
839 | list_del(&iocbp->head); | |
840 | if (iocbp->nbytes) | |
841 | free(iocbp->iocb.u.c.buf); | |
842 | free(iocbp); | |
843 | } | |
844 | ||
845 | pthread_cond_destroy(&tip->cond); | |
846 | pthread_mutex_destroy(&tip->mutex); | |
847 | } | |
848 | ||
849 | /** | |
850 | * add_input_file - Allocate and initialize per-input file structure | |
851 | * @cpu: CPU for this file | |
852 | * @devnm: Device name for this file | |
853 | * @file_name: Fully qualifed input file name | |
854 | */ | |
855 | static void add_input_file(int cpu, char *devnm, char *file_name) | |
856 | { | |
857 | struct stat buf; | |
858 | struct io_file_hdr hdr; | |
859 | struct thr_info *tip = buf_alloc(sizeof(*tip)); | |
860 | __u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub); | |
861 | ||
862 | assert(0 <= cpu && cpu < ncpus); | |
863 | ||
864 | memset(&hdr, 0, sizeof(hdr)); | |
865 | memset(tip, 0, sizeof(*tip)); | |
866 | tip->cpu = cpu % cpus_to_use; | |
867 | tip->iterations = def_iterations; | |
868 | ||
869 | tip->ifd = open(file_name, O_RDONLY); | |
870 | if (tip->ifd < 0) { | |
871 | fatal(file_name, ERR_ARGS, "Unable to open\n"); | |
872 | /*NOTREACHED*/ | |
873 | } | |
874 | if (fstat(tip->ifd, &buf) < 0) { | |
875 | fatal(file_name, ERR_SYSCALL, "fstat failed\n"); | |
876 | /*NOTREACHED*/ | |
877 | } | |
878 | if (buf.st_size < (off_t)sizeof(hdr)) { | |
879 | if (verbose) | |
880 | fprintf(stderr, "\t%s empty\n", file_name); | |
881 | goto empty_file; | |
882 | } | |
883 | ||
884 | if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) { | |
885 | fatal(file_name, ERR_ARGS, "Header read failed\n"); | |
886 | /*NOTREACHED*/ | |
887 | } | |
888 | ||
889 | if (hdr.version != my_version) { | |
890 | fprintf(stderr, "%llx %llx %llx %llx\n", | |
891 | (long long unsigned)hdr.version, | |
892 | (long long unsigned)hdr.genesis, | |
893 | (long long unsigned)hdr.nbunches, | |
894 | (long long unsigned)hdr.total_pkts); | |
895 | fatal(NULL, ERR_ARGS, | |
896 | "BT version mismatch: %lx versus my %lx\n", | |
897 | (long)hdr.version, (long)my_version); | |
898 | ||
899 | } | |
900 | ||
901 | if (hdr.nbunches == 0) { | |
902 | empty_file: | |
903 | close(tip->ifd); | |
904 | free(tip); | |
905 | return; | |
906 | } | |
907 | ||
908 | if (hdr.genesis < genesis) { | |
909 | if (verbose > 1) | |
910 | fprintf(stderr, "Setting genesis to %llu.%llu\n", | |
911 | du64_to_sec(hdr.genesis), | |
912 | du64_to_nsec(hdr.genesis)); | |
913 | genesis = hdr.genesis; | |
914 | } | |
915 | ||
916 | tip->devnm = strdup(devnm); | |
917 | tip->file_name = strdup(file_name); | |
918 | ||
919 | list_add_tail(&tip->head, &input_files); | |
920 | ||
921 | if (verbose) | |
922 | fprintf(stderr, "Added %s %llu\n", file_name, | |
923 | (long long)hdr.genesis); | |
924 | } | |
925 | ||
926 | /** | |
927 | * rem_input_file - Release resources associated with an input file | |
928 | * @tip: Per-input file information | |
929 | */ | |
930 | static void rem_input_file(struct thr_info *tip) | |
931 | { | |
932 | list_del(&tip->head); | |
933 | ||
934 | tip_release(tip); | |
935 | ||
936 | close(tip->ofd); | |
937 | close(tip->ifd); | |
938 | free(tip->file_name); | |
939 | free(tip->devnm); | |
940 | free(tip); | |
941 | } | |
942 | ||
943 | /** | |
944 | * rem_input_files - Remove all input files | |
945 | */ | |
946 | static void rem_input_files(void) | |
947 | { | |
948 | struct list_head *p, *q; | |
949 | ||
950 | list_for_each_safe(p, q, &input_files) { | |
951 | rem_input_file(list_entry(p, struct thr_info, head)); | |
952 | } | |
953 | } | |
954 | ||
955 | /** | |
956 | * __find_input_files - Find input files associated with this device (per cpu) | |
957 | */ | |
958 | static void __find_input_files(struct dev_info *dip) | |
959 | { | |
960 | int cpu = 0; | |
961 | ||
962 | for (;;) { | |
963 | char full_name[MAXPATHLEN]; | |
964 | ||
965 | sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu); | |
966 | if (access(full_name, R_OK) != 0) | |
967 | break; | |
968 | ||
969 | add_input_file(cpu, dip->devnm, full_name); | |
970 | cpu++; | |
971 | } | |
972 | ||
973 | if (!cpu) { | |
974 | fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm); | |
975 | /*NOTREACHED*/ | |
976 | } | |
977 | ||
978 | rem_input_dev(dip); | |
979 | } | |
980 | ||
981 | ||
982 | /** | |
983 | * find_input_files - Find input files for all devices | |
984 | */ | |
985 | static void find_input_files(void) | |
986 | { | |
987 | struct list_head *p, *q; | |
988 | ||
989 | list_for_each_safe(p, q, &input_devs) { | |
990 | __find_input_files(list_entry(p, struct dev_info, head)); | |
991 | } | |
992 | } | |
993 | ||
994 | /* | |
995 | * ======================================================================== | |
996 | * ==== RECLAIM ROUTINES ================================================== | |
997 | * ======================================================================== | |
998 | */ | |
999 | ||
1000 | /** | |
1001 | * reap_wait_aios - Wait for and return number of outstanding AIOs | |
1002 | * | |
1003 | * Will return 0 if we are done | |
1004 | */ | |
1005 | static int reap_wait_aios(struct thr_info *tip) | |
1006 | { | |
1007 | int naios = 0; | |
1008 | ||
1009 | if (!is_reap_done(tip)) { | |
1010 | pthread_mutex_lock(&tip->mutex); | |
1011 | while (tip->naios_out == 0) { | |
1012 | tip->reap_wait = 1; | |
1013 | if (pthread_cond_wait(&tip->cond, &tip->mutex)) { | |
1014 | fatal("pthread_cond_wait", ERR_SYSCALL, | |
1015 | "nfree_current cond wait failed\n"); | |
1016 | /*NOTREACHED*/ | |
1017 | } | |
1018 | } | |
1019 | naios = tip->naios_out; | |
1020 | pthread_mutex_unlock(&tip->mutex); | |
1021 | } | |
1022 | assert(is_reap_done(tip) || naios > 0); | |
1023 | ||
1024 | return is_reap_done(tip) ? 0 : naios; | |
1025 | } | |
1026 | ||
1027 | /** | |
1028 | * reclaim_ios - Reclaim AIOs completed, recycle IOCBs | |
1029 | * @tip: Per-thread information | |
1030 | * @naios_out: Number of AIOs we have outstanding (min) | |
1031 | */ | |
1032 | static void reclaim_ios(struct thr_info *tip, long naios_out) | |
1033 | { | |
1034 | long i, ndone; | |
1035 | struct io_event *evp, events[naios_out]; | |
1036 | ||
1037 | again: | |
1038 | assert(naios > 0); | |
1039 | for (;;) { | |
1040 | ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL); | |
1041 | if (ndone > 0) | |
1042 | break; | |
1043 | ||
1044 | if (errno && errno != EINTR) { | |
1045 | fatal("io_getevents", ERR_SYSCALL, | |
1046 | "io_getevents failed\n"); | |
1047 | /*NOTREACHED*/ | |
1048 | } | |
1049 | } | |
1050 | assert(0 < ndone && ndone <= naios_out); | |
1051 | ||
1052 | pthread_mutex_lock(&tip->mutex); | |
1053 | for (i = 0, evp = events; i < ndone; i++, evp++) { | |
1054 | struct iocb_pkt *iocbp = evp->data; | |
1055 | ||
1056 | if (evp->res != iocbp->iocb.u.c.nbytes) { | |
1057 | fatal(NULL, ERR_SYSCALL, | |
1058 | "Event failure %ld/%ld\t(%ld + %ld)\n", | |
1059 | (long)evp->res, (long)evp->res2, | |
1060 | (long)iocbp->iocb.u.c.offset / nb_sec, | |
1061 | (long)iocbp->iocb.u.c.nbytes / nb_sec); | |
1062 | /*NOTREACHED*/ | |
1063 | } | |
1064 | ||
1065 | list_move_tail(&iocbp->head, &tip->free_iocbs); | |
1066 | } | |
1067 | ||
1068 | tip->naios_free += ndone; | |
1069 | tip->naios_out -= ndone; | |
1070 | naios_out = minl(naios_out, tip->naios_out); | |
1071 | ||
1072 | if (tip->send_wait) { | |
1073 | tip->send_wait = 0; | |
1074 | pthread_cond_signal(&tip->cond); | |
1075 | } | |
1076 | pthread_mutex_unlock(&tip->mutex); | |
1077 | ||
1078 | /* | |
1079 | * Short cut: If we /know/ there are some more AIOs, go handle them | |
1080 | */ | |
1081 | if (naios_out) | |
1082 | goto again; | |
1083 | } | |
1084 | ||
1085 | /** | |
1086 | * replay_rec - Worker thread to reclaim AIOs | |
1087 | * @arg: Pointer to thread information | |
1088 | */ | |
1089 | static void *replay_rec(void *arg) | |
1090 | { | |
1091 | long naios_out; | |
1092 | struct thr_info *tip = arg; | |
1093 | ||
1094 | while ((naios_out = reap_wait_aios(tip)) > 0) | |
1095 | reclaim_ios(tip, naios_out); | |
1096 | ||
1097 | assert(tip->send_done); | |
1098 | tip->reap_done = 1; | |
1099 | set_reclaim_done(); | |
1100 | ||
1101 | return NULL; | |
1102 | } | |
1103 | ||
1104 | /* | |
1105 | * ======================================================================== | |
1106 | * ==== REPLAY ROUTINES =================================================== | |
1107 | * ======================================================================== | |
1108 | */ | |
1109 | ||
1110 | /** | |
1111 | * next_bunch - Retrieve next bunch of AIOs to process | |
1112 | * @tip: Per-thread information | |
1113 | * @bunch: Bunch information | |
1114 | * | |
1115 | * Returns TRUE if we recovered a bunch of IOs, else hit EOF | |
1116 | */ | |
1117 | static int next_bunch(struct thr_info *tip, struct io_bunch *bunch) | |
1118 | { | |
1119 | size_t count, result; | |
1120 | ||
1121 | result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr)); | |
1122 | if (result != sizeof(bunch->hdr)) { | |
1123 | if (result == 0) | |
1124 | return 0; | |
1125 | ||
1126 | fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n", | |
1127 | (long)result); | |
1128 | /*NOTREACHED*/ | |
1129 | } | |
1130 | assert(bunch->hdr.npkts <= BT_MAX_PKTS); | |
1131 | ||
1132 | count = bunch->hdr.npkts * sizeof(struct io_pkt); | |
1133 | result = read(tip->ifd, &bunch->pkts, count); | |
1134 | if (result != count) { | |
1135 | fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n", | |
1136 | (long)result, (long)count); | |
1137 | /*NOTREACHED*/ | |
1138 | } | |
1139 | ||
1140 | return 1; | |
1141 | } | |
1142 | ||
1143 | /** | |
1144 | * nfree_current - Returns current number of AIOs that are free | |
1145 | * | |
1146 | * Will wait for available ones... | |
1147 | * | |
1148 | * Returns 0 if we have some condition that causes us to exit | |
1149 | */ | |
1150 | static int nfree_current(struct thr_info *tip) | |
1151 | { | |
1152 | int nfree = 0; | |
1153 | ||
1154 | pthread_mutex_lock(&tip->mutex); | |
1155 | while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) { | |
1156 | tip->send_wait = 1; | |
1157 | if (pthread_cond_wait(&tip->cond, &tip->mutex)) { | |
1158 | fatal("pthread_cond_wait", ERR_SYSCALL, | |
1159 | "nfree_current cond wait failed\n"); | |
1160 | /*NOTREACHED*/ | |
1161 | } | |
1162 | } | |
1163 | pthread_mutex_unlock(&tip->mutex); | |
1164 | ||
1165 | return nfree; | |
1166 | } | |
1167 | ||
1168 | /** | |
1169 | * stall - Stall for the number of nanoseconds requested | |
1170 | * | |
1171 | * We may be late, in which case we just return. | |
1172 | */ | |
1173 | static void stall(struct thr_info *tip, long long oclock) | |
1174 | { | |
1175 | struct timespec req; | |
1176 | long long dreal, tclock = gettime() - rgenesis; | |
1177 | ||
1178 | if (verbose > 1) | |
1179 | fprintf(tip->vfp, " stall(%lld.%09lld, %lld.%09lld)\n", | |
1180 | du64_to_sec(oclock), du64_to_nsec(oclock), | |
1181 | du64_to_sec(tclock), du64_to_nsec(tclock)); | |
1182 | ||
1183 | while (!is_send_done(tip) && tclock < oclock) { | |
1184 | dreal = oclock - tclock; | |
1185 | req.tv_sec = dreal / (1000 * 1000 * 1000); | |
1186 | req.tv_nsec = dreal % (1000 * 1000 * 1000); | |
1187 | ||
1188 | if (verbose > 1) { | |
1189 | fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n", | |
1190 | (long long)req.tv_sec, | |
1191 | (long long)req.tv_nsec); | |
1192 | } | |
1193 | ||
1194 | if (nanosleep(&req, NULL) < 0 && signal_done) | |
1195 | break; | |
1196 | ||
1197 | tclock = gettime() - rgenesis; | |
1198 | } | |
1199 | } | |
1200 | ||
1201 | /** | |
1202 | * iocbs_map - Map a set of AIOs onto a set of IOCBs | |
1203 | * @tip: Per-thread information | |
1204 | * @list: List of AIOs created | |
1205 | * @pkts: AIOs to map | |
1206 | * @ntodo: Number of AIOs to map | |
1207 | */ | |
1208 | static void iocbs_map(struct thr_info *tip, struct iocb **list, | |
1209 | struct io_pkt *pkts, int ntodo) | |
1210 | { | |
1211 | int i; | |
1212 | struct io_pkt *pkt; | |
1213 | ||
1214 | assert(0 < ntodo && ntodo <= naios); | |
1215 | ||
1216 | pthread_mutex_lock(&tip->mutex); | |
1217 | assert(ntodo <= list_len(&tip->free_iocbs)); | |
1218 | for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) { | |
1219 | __u32 rw = pkt->rw; | |
1220 | struct iocb_pkt *iocbp; | |
1221 | ||
1222 | if (!pkt->rw && !write_enabled) | |
1223 | rw = 1; | |
1224 | ||
1225 | if (verbose > 1) | |
1226 | fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n", | |
1227 | (unsigned long long)pkt->sector, | |
1228 | (unsigned long long)pkt->nbytes / nb_sec, | |
1229 | rw ? 'R' : 'W', | |
1230 | (rw == 1 && pkt->rw == 0) ? '!' : ' '); | |
1231 | ||
1232 | iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head); | |
1233 | iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec); | |
1234 | ||
1235 | list_move_tail(&iocbp->head, &tip->used_iocbs); | |
1236 | list[i] = &iocbp->iocb; | |
1237 | } | |
1238 | ||
1239 | tip->naios_free -= ntodo; | |
1240 | assert(tip->naios_free >= 0); | |
1241 | pthread_mutex_unlock(&tip->mutex); | |
1242 | } | |
1243 | ||
1244 | /** | |
1245 | * process_bunch - Process a bunch of requests | |
1246 | * @tip: Per-thread information | |
1247 | * @bunch: Bunch to process | |
1248 | */ | |
1249 | static void process_bunch(struct thr_info *tip, struct io_bunch *bunch) | |
1250 | { | |
1251 | __u64 i = 0; | |
1252 | struct iocb *list[bunch->hdr.npkts]; | |
1253 | ||
1254 | assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS); | |
1255 | while (!is_send_done(tip) && (i < bunch->hdr.npkts)) { | |
1256 | long ndone; | |
1257 | int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i); | |
1258 | ||
1259 | assert(0 < ntodo && ntodo <= naios); | |
1260 | iocbs_map(tip, list, &bunch->pkts[i], ntodo); | |
1261 | if (!no_stalls) | |
1262 | stall(tip, bunch->hdr.time_stamp - genesis); | |
1263 | ||
1264 | if (ntodo) { | |
1265 | if (verbose > 1) | |
1266 | fprintf(tip->vfp, "submit(%d)\n", ntodo); | |
1267 | ndone = io_submit(tip->ctx, ntodo, list); | |
1268 | if (ndone != (long)ntodo) { | |
1269 | fatal("io_submit", ERR_SYSCALL, | |
1270 | "%d: io_submit(%d:%ld) failed (%s)\n", | |
1271 | tip->cpu, ntodo, ndone, | |
1272 | strerror(labs(ndone))); | |
1273 | /*NOTREACHED*/ | |
1274 | } | |
1275 | ||
1276 | pthread_mutex_lock(&tip->mutex); | |
1277 | tip->naios_out += ndone; | |
1278 | assert(tip->naios_out <= naios); | |
1279 | if (tip->reap_wait) { | |
1280 | tip->reap_wait = 0; | |
1281 | pthread_cond_signal(&tip->cond); | |
1282 | } | |
1283 | pthread_mutex_unlock(&tip->mutex); | |
1284 | ||
1285 | i += ndone; | |
1286 | assert(i <= bunch->hdr.npkts); | |
1287 | } | |
1288 | } | |
1289 | } | |
1290 | ||
1291 | /** | |
1292 | * reset_input_file - Reset the input file for the next iteration | |
1293 | * @tip: Thread information | |
1294 | * | |
1295 | * We also do a dummy read of the file header to get us to the first bunch. | |
1296 | */ | |
1297 | static void reset_input_file(struct thr_info *tip) | |
1298 | { | |
1299 | struct io_file_hdr hdr; | |
1300 | ||
1301 | lseek(tip->ifd, 0, 0); | |
1302 | ||
1303 | if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) { | |
1304 | fatal(tip->file_name, ERR_ARGS, "Header reread failed\n"); | |
1305 | /*NOTREACHED*/ | |
1306 | } | |
1307 | } | |
1308 | ||
1309 | /** | |
1310 | * replay_sub - Worker thread to submit AIOs that are being replayed | |
1311 | */ | |
1312 | static void *replay_sub(void *arg) | |
1313 | { | |
1314 | char path[MAXPATHLEN]; | |
1315 | struct io_bunch bunch; | |
1316 | struct thr_info *tip = arg; | |
1317 | ||
1318 | pin_to_cpu(tip); | |
1319 | ||
1320 | sprintf(path, "/dev/%s", map_dev(tip->devnm)); | |
1321 | tip->ofd = open(path, O_RDWR | O_DIRECT); | |
1322 | if (tip->ofd < 0) { | |
1323 | fatal(path, ERR_SYSCALL, "Failed device open\n"); | |
1324 | /*NOTREACHED*/ | |
1325 | } | |
1326 | ||
1327 | set_replay_ready(); | |
1328 | while (!is_send_done(tip) && tip->iterations--) { | |
1329 | wait_iter_start(); | |
cbb3e69e | 1330 | if (verbose > 1) |
d47a3fec AB |
1331 | fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations); |
1332 | while (!is_send_done(tip) && next_bunch(tip, &bunch)) | |
1333 | process_bunch(tip, &bunch); | |
1334 | set_iter_done(); | |
1335 | reset_input_file(tip); | |
1336 | } | |
1337 | tip->send_done = 1; | |
1338 | set_replay_done(); | |
1339 | ||
1340 | return NULL; | |
1341 | } | |
1342 | ||
1343 | /* | |
1344 | * ======================================================================== | |
1345 | * ==== COMMAND LINE ARGUMENT HANDLING ==================================== | |
1346 | * ======================================================================== | |
1347 | */ | |
1348 | ||
1349 | static char usage_str[] = \ | |
1350 | "\n" \ | |
1351 | "\t[ -c <cpus> : --cpus=<cpus> ] Default: 1\n" \ | |
1352 | "\t[ -d <dir> : --input-directory=<dir> ] Default: .\n" \ | |
1353 | "\t[ -F : --find-records ] Default: Off\n" \ | |
1354 | "\t[ -h : --help ] Default: Off\n" \ | |
1355 | "\t[ -i <base> : --input-base=<base> ] Default: replay\n" \ | |
1356 | "\t[ -I <iters>: --iterations=<iters> ] Default: 1\n" \ | |
1357 | "\t[ -M <file> : --map-devs=<file> ] Default: None\n" \ | |
1358 | "\t[ -N : --no-stalls ] Default: Off\n" \ | |
1359 | "\t[ -v : --verbose ] Default: Off\n" \ | |
1360 | "\t[ -V : --version ] Default: Off\n" \ | |
1361 | "\t[ -W : --write-enable ] Default: Off\n" \ | |
1362 | "\t<dev...> Default: None\n" \ | |
1363 | "\n"; | |
1364 | ||
1365 | #define S_OPTS "c:d:Fhi:I:M:Nt:vVW" | |
1366 | static struct option l_opts[] = { | |
1367 | { | |
1368 | .name = "cpus", | |
1369 | .has_arg = required_argument, | |
1370 | .flag = NULL, | |
1371 | .val = 'c' | |
1372 | }, | |
1373 | { | |
1374 | .name = "input-directory", | |
1375 | .has_arg = required_argument, | |
1376 | .flag = NULL, | |
1377 | .val = 'd' | |
1378 | }, | |
1379 | { | |
1380 | .name = "find-records", | |
1381 | .has_arg = no_argument, | |
1382 | .flag = NULL, | |
1383 | .val = 'F' | |
1384 | }, | |
1385 | { | |
1386 | .name = "help", | |
1387 | .has_arg = no_argument, | |
1388 | .flag = NULL, | |
1389 | .val = 'h' | |
1390 | }, | |
1391 | { | |
1392 | .name = "input-base", | |
1393 | .has_arg = required_argument, | |
1394 | .flag = NULL, | |
1395 | .val = 'i' | |
1396 | }, | |
1397 | { | |
1398 | .name = "iterations", | |
1399 | .has_arg = required_argument, | |
1400 | .flag = NULL, | |
1401 | .val = 'I' | |
1402 | }, | |
1403 | { | |
1404 | .name = "map-devs", | |
1405 | .has_arg = required_argument, | |
1406 | .flag = NULL, | |
1407 | .val = 'M' | |
1408 | }, | |
1409 | { | |
1410 | .name = "no-stalls", | |
1411 | .has_arg = no_argument, | |
1412 | .flag = NULL, | |
1413 | .val = 'N' | |
1414 | }, | |
1415 | { | |
1416 | .name = "verbose", | |
1417 | .has_arg = no_argument, | |
1418 | .flag = NULL, | |
1419 | .val = 'v' | |
1420 | }, | |
1421 | { | |
1422 | .name = "version", | |
1423 | .has_arg = no_argument, | |
1424 | .flag = NULL, | |
1425 | .val = 'V' | |
1426 | }, | |
1427 | { | |
1428 | .name = "write-enable", | |
1429 | .has_arg = no_argument, | |
1430 | .flag = NULL, | |
1431 | .val = 'W' | |
1432 | }, | |
1433 | { | |
1434 | .name = NULL | |
1435 | } | |
1436 | }; | |
1437 | ||
1438 | /** | |
1439 | * handle_args: Parse passed in argument list | |
1440 | * @argc: Number of arguments in argv | |
1441 | * @argv: Arguments passed in | |
1442 | * | |
1443 | * Does rudimentary parameter verification as well. | |
1444 | */ | |
1445 | static void handle_args(int argc, char *argv[]) | |
1446 | { | |
1447 | int c; | |
1448 | ||
1449 | while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) { | |
1450 | switch (c) { | |
1451 | case 'c': | |
1452 | cpus_to_use = atoi(optarg); | |
1453 | if (cpus_to_use <= 0 || cpus_to_use > ncpus) { | |
1454 | fatal(NULL, ERR_ARGS, | |
1455 | "Invalid number of cpus %d (0<x<%d)\n", | |
1456 | cpus_to_use, ncpus); | |
1457 | /*NOTREACHED*/ | |
1458 | } | |
1459 | break; | |
1460 | ||
1461 | case 'd': | |
1462 | idir = optarg; | |
1463 | if (access(idir, R_OK | X_OK) != 0) { | |
1464 | fatal(idir, ERR_ARGS, | |
1465 | "Invalid input directory specified\n"); | |
1466 | /*NOTREACHED*/ | |
1467 | } | |
1468 | break; | |
1469 | ||
1470 | case 'F': | |
1471 | find_records = 1; | |
1472 | break; | |
1473 | ||
1474 | case 'h': | |
1475 | usage(); | |
1476 | exit(0); | |
1477 | /*NOTREACHED*/ | |
1478 | ||
1479 | case 'i': | |
1480 | ibase = optarg; | |
1481 | break; | |
1482 | ||
1483 | case 'I': | |
1484 | def_iterations = atoi(optarg); | |
1485 | if (def_iterations <= 0) { | |
1486 | fprintf(stderr, | |
1487 | "Invalid number of iterations %d\n", | |
1488 | def_iterations); | |
1489 | exit(ERR_ARGS); | |
1490 | /*NOTREACHED*/ | |
1491 | } | |
1492 | break; | |
1493 | ||
1494 | case 'M': | |
1495 | read_map_devs(optarg); | |
1496 | break; | |
1497 | ||
1498 | case 'N': | |
1499 | no_stalls = 1; | |
1500 | break; | |
1501 | ||
1502 | case 'V': | |
1503 | fprintf(stderr, "btreplay -- version %s\n", | |
1504 | my_btversion); | |
1505 | fprintf(stderr, " Built on %s\n", | |
1506 | build_date); | |
1507 | exit(0); | |
1508 | /*NOTREACHED*/ | |
1509 | ||
1510 | case 'v': | |
1511 | verbose++; | |
1512 | break; | |
1513 | ||
1514 | case 'W': | |
1515 | write_enabled = 1; | |
1516 | break; | |
1517 | ||
1518 | default: | |
1519 | usage(); | |
1520 | fatal(NULL, ERR_ARGS, | |
1521 | "Invalid command line argument %c\n", c); | |
1522 | /*NOTREACHED*/ | |
1523 | } | |
1524 | } | |
1525 | ||
1526 | while (optind < argc) | |
1527 | add_input_dev(argv[optind++]); | |
1528 | ||
1529 | if (find_records) | |
1530 | find_input_devs(idir); | |
1531 | ||
1532 | if (list_len(&input_devs) == 0) { | |
1533 | fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n"); | |
1534 | /*NOTREACHED*/ | |
1535 | } | |
1536 | ||
1537 | if (cpus_to_use < 0) | |
1538 | cpus_to_use = ncpus; | |
1539 | } | |
1540 | ||
1541 | /* | |
1542 | * ======================================================================== | |
1543 | * ==== MAIN ROUTINE ====================================================== | |
1544 | * ======================================================================== | |
1545 | */ | |
1546 | ||
1547 | /** | |
1548 | * set_signal_done - Signal handler, catches signals & sets signal_done | |
1549 | */ | |
1550 | static void set_signal_done(__attribute__((__unused__))int signum) | |
1551 | { | |
1552 | signal_done = 1; | |
1553 | } | |
1554 | ||
1555 | /** | |
1556 | * main - | |
1557 | * @argc: Number of arguments | |
1558 | * @argv: Array of arguments | |
1559 | */ | |
1560 | int main(int argc, char *argv[]) | |
1561 | { | |
1562 | int i; | |
1563 | struct list_head *p; | |
1564 | ||
1565 | pgsize = getpagesize(); | |
1566 | assert(pgsize > 0); | |
1567 | ||
1568 | setup_signal(SIGINT, set_signal_done); | |
1569 | setup_signal(SIGTERM, set_signal_done); | |
1570 | ||
1571 | get_ncpus(); | |
1572 | handle_args(argc, argv); | |
1573 | find_input_files(); | |
1574 | ||
1575 | nfiles = list_len(&input_files); | |
1576 | __list_for_each(p, &input_files) { | |
1577 | tip_init(list_entry(p, struct thr_info, head)); | |
1578 | } | |
1579 | ||
1580 | wait_replays_ready(); | |
1581 | for (i = 0; i < def_iterations; i++) { | |
1582 | rgenesis = gettime(); | |
1583 | start_iter(); | |
1584 | if (verbose) | |
1585 | fprintf(stderr, "I"); | |
1586 | wait_iters_done(); | |
1587 | } | |
1588 | ||
1589 | wait_replays_done(); | |
1590 | wait_reclaims_done(); | |
1591 | ||
1592 | if (verbose) | |
1593 | fprintf(stderr, "\n"); | |
1594 | ||
1595 | rem_input_files(); | |
1596 | release_map_devs(); | |
1597 | ||
1598 | return 0; | |
1599 | } |