Commit | Line | Data |
---|---|---|
d47a3fec AB |
1 | /* |
2 | * Blktrace replay utility - Play traces back | |
3 | * | |
4 | * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 | */ | |
20 | ||
21 | static char build_date[] = __DATE__ " at "__TIME__; | |
22 | ||
23 | #include <assert.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <libaio.h> | |
27 | #include <pthread.h> | |
28 | #include <sched.h> | |
29 | #include <signal.h> | |
30 | #include <stdio.h> | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | #include <time.h> | |
34 | #include <unistd.h> | |
35 | #include <sys/param.h> | |
36 | #include <sys/stat.h> | |
37 | #include <sys/time.h> | |
38 | #include <sys/types.h> | |
39 | #include <dirent.h> | |
40 | ||
41 | #if !defined(_GNU_SOURCE) | |
42 | # define _GNU_SOURCE | |
43 | #endif | |
44 | #include <getopt.h> | |
45 | ||
46 | #include "list.h" | |
47 | #include "btrecord.h" | |
48 | ||
49 | /* | |
50 | * ======================================================================== | |
51 | * ==== STRUCTURE DEFINITIONS ============================================= | |
52 | * ======================================================================== | |
53 | */ | |
54 | ||
55 | /** | |
56 | * Each device map has one of these: | |
57 | * | |
58 | * @head: Linked on to map_devs | |
59 | * @from_dev: Device name as seen on recorded system | |
60 | * @to_dev: Device name to be used on replay system | |
61 | */ | |
62 | struct map_dev { | |
63 | struct list_head head; | |
64 | char *from_dev, *to_dev; | |
65 | }; | |
66 | ||
67 | /** | |
68 | * Each device name specified has one of these (until threads are created) | |
69 | * | |
70 | * @head: Linked onto input_devs | |
71 | * @devnm: Device name -- 'sd*' | |
72 | */ | |
73 | struct dev_info { | |
74 | struct list_head head; | |
75 | char *devnm; | |
76 | }; | |
77 | ||
78 | /* | |
79 | * Per input file information | |
80 | * | |
81 | * @head: Used to link up on input_files | |
82 | * @free_iocbs: List of free iocb's available for use | |
83 | * @used_iocbs: List of iocb's currently outstanding | |
84 | * @mutex: Mutex used with condition variable to protect volatile values | |
85 | * @cond: Condition variable used when waiting on a volatile value change | |
86 | * @naios_out: Current number of AIOs outstanding on this context | |
87 | * @naios_free: Number of AIOs on the free list (short cut for list_len) | |
88 | * @send_wait: Boolean: When true, the sub thread is waiting on free IOCBs | |
89 | * @reap_wait: Boolean: When true, the rec thread is waiting on used IOCBs | |
90 | * @send_done: Boolean: When true, the sub thread has completed work | |
91 | * @reap_done: Boolean: When true, the rec thread has completed work | |
92 | * @sub_thread: Thread used to submit IOs. | |
93 | * @rec_thread: Thread used to reclaim IOs. | |
94 | * @ctx: IO context | |
95 | * @devnm: Copy of the device name being managed by this thread | |
96 | * @file_name: Full name of the input file | |
97 | * @cpu: CPU this thread is pinned to | |
98 | * @ifd: Input file descriptor | |
99 | * @ofd: Output file descriptor | |
100 | * @iterations: Remaining iterations to process | |
101 | * @vfp: For verbose dumping of actions performed | |
102 | */ | |
103 | struct thr_info { | |
104 | struct list_head head, free_iocbs, used_iocbs; | |
105 | pthread_mutex_t mutex; | |
106 | pthread_cond_t cond; | |
107 | volatile long naios_out, naios_free; | |
108 | volatile int send_wait, reap_wait, send_done, reap_done; | |
109 | pthread_t sub_thread, rec_thread; | |
110 | io_context_t ctx; | |
111 | char *devnm, *file_name; | |
112 | int cpu, ifd, ofd, iterations; | |
113 | FILE *vfp; | |
114 | }; | |
115 | ||
116 | /* | |
117 | * Every Asynchronous IO used has one of these (naios per file/device). | |
118 | * | |
119 | * @iocb: IOCB sent down via io_submit | |
120 | * @head: Linked onto file_list.free_iocbs or file_list.used_iocbs | |
121 | * @tip: Pointer to per-thread information this IO is associated with | |
122 | * @nbytes: Number of bytes in buffer associated with iocb | |
123 | */ | |
124 | struct iocb_pkt { | |
125 | struct iocb iocb; | |
126 | struct list_head head; | |
127 | struct thr_info *tip; | |
128 | int nbytes; | |
129 | }; | |
130 | ||
131 | /* | |
132 | * ======================================================================== | |
133 | * ==== GLOBAL VARIABLES ================================================== | |
134 | * ======================================================================== | |
135 | */ | |
136 | ||
137 | static volatile int signal_done = 0; // Boolean: Signal'ed, need to quit | |
138 | ||
139 | static char *ibase = "replay"; // Input base name | |
140 | static char *idir = "."; // Input directory base | |
141 | static int cpus_to_use = -1; // Number of CPUs to use | |
142 | static int def_iterations = 1; // Default number of iterations | |
143 | static int naios = 512; // Number of AIOs per thread | |
144 | static int ncpus = 0; // Number of CPUs in the system | |
145 | static int verbose = 0; // Boolean: Output some extra info | |
146 | static int write_enabled = 0; // Boolean: Enable writing | |
147 | static __u64 genesis = ~0; // Earliest time seen | |
148 | static __u64 rgenesis; // Our start time | |
149 | static size_t pgsize; // System Page size | |
150 | static int nb_sec = 512; // Number of bytes per sector | |
151 | static LIST_HEAD(input_devs); // List of devices to handle | |
152 | static LIST_HEAD(input_files); // List of input files to handle | |
153 | static LIST_HEAD(map_devs); // List of device maps | |
154 | static int nfiles = 0; // Number of files to handle | |
155 | static int no_stalls = 0; // Boolean: Disable pre-stalls | |
156 | static int find_records = 0; // Boolean: Find record files auto | |
157 | ||
158 | /* | |
159 | * Variables managed under control of condition variables. | |
160 | * | |
161 | * n_reclaims_done: Counts number of reclaim threads that have completed. | |
162 | * n_replays_done: Counts number of replay threads that have completed. | |
163 | * n_replays_ready: Counts number of replay threads ready to start. | |
164 | * n_iters_done: Counts number of replay threads done one iteration. | |
165 | * iter_start: Starts an iteration for the replay threads. | |
166 | */ | |
167 | static volatile int n_reclaims_done = 0; | |
168 | static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
169 | static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER; | |
170 | ||
171 | static volatile int n_replays_done = 0; | |
172 | static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
173 | static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER; | |
174 | ||
175 | static volatile int n_replays_ready = 0; | |
176 | static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER; | |
177 | static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER; | |
178 | ||
179 | static volatile int n_iters_done = 0; | |
180 | static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
181 | static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER; | |
182 | ||
183 | static volatile int iter_start = 0; | |
184 | static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER; | |
185 | static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER; | |
186 | ||
187 | /* | |
188 | * ======================================================================== | |
189 | * ==== FORWARD REFERENECES =============================================== | |
190 | * ======================================================================== | |
191 | */ | |
192 | ||
193 | static void *replay_sub(void *arg); | |
194 | static void *replay_rec(void *arg); | |
195 | static char usage_str[]; | |
196 | ||
197 | /* | |
198 | * ======================================================================== | |
199 | * ==== INLINE ROUTINES =================================================== | |
200 | * ======================================================================== | |
201 | */ | |
202 | ||
203 | /* | |
204 | * The 'fatal' macro will output a perror message (if errstring is !NULL) | |
205 | * and display a string (with variable arguments) and then exit with the | |
206 | * specified exit value. | |
207 | */ | |
208 | #define ERR_ARGS 1 | |
209 | #define ERR_SYSCALL 2 | |
210 | #define fatal(errstring, exitval, arg...) \ | |
211 | do { \ | |
212 | if (errstring) perror(errstring); \ | |
213 | fprintf(stderr, ##arg); \ | |
214 | exit(exitval); \ | |
215 | /*NOTREACHED*/ \ | |
216 | } while (0) | |
217 | ||
218 | static inline long long unsigned du64_to_sec(__u64 du64) | |
219 | { | |
220 | return (long long unsigned)du64 / (1000 * 1000 * 1000); | |
221 | } | |
222 | ||
223 | static inline long long unsigned du64_to_nsec(__u64 du64) | |
224 | { | |
225 | return llabs((long long)du64) % (1000 * 1000 * 1000); | |
226 | } | |
227 | ||
228 | /** | |
229 | * min - Return minimum of two integers | |
230 | */ | |
231 | static inline int min(int a, int b) | |
232 | { | |
233 | return a < b ? a : b; | |
234 | } | |
235 | ||
236 | /** | |
237 | * minl - Return minimum of two longs | |
238 | */ | |
239 | static inline long minl(long a, long b) | |
240 | { | |
241 | return a < b ? a : b; | |
242 | } | |
243 | ||
244 | /** | |
245 | * usage - Display usage string and version | |
246 | */ | |
247 | static inline void usage(void) | |
248 | { | |
249 | fprintf(stderr, "Usage: btreplay -- version %s\n%s", | |
250 | my_btversion, usage_str); | |
251 | } | |
252 | ||
253 | /** | |
254 | * is_send_done - Returns true if sender should quit early | |
255 | * @tip: Per-thread information | |
256 | */ | |
257 | static inline int is_send_done(struct thr_info *tip) | |
258 | { | |
259 | return signal_done || tip->send_done; | |
260 | } | |
261 | ||
262 | /** | |
263 | * is_reap_done - Returns true if reaper should quit early | |
264 | * @tip: Per-thread information | |
265 | */ | |
266 | static inline int is_reap_done(struct thr_info *tip) | |
267 | { | |
268 | return tip->send_done && tip->naios_out == 0; | |
269 | } | |
270 | ||
271 | /** | |
272 | * ts2ns - Convert timespec values to a nanosecond value | |
273 | */ | |
274 | #define NS_TICKS ((__u64)1000 * (__u64)1000 * (__u64)1000) | |
275 | static inline __u64 ts2ns(struct timespec *ts) | |
276 | { | |
277 | return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec); | |
278 | } | |
279 | ||
280 | /** | |
281 | * ts2ns - Convert timeval values to a nanosecond value | |
282 | */ | |
283 | static inline __u64 tv2ns(struct timeval *tp) | |
284 | { | |
285 | return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000); | |
286 | } | |
287 | ||
288 | /** | |
289 | * touch_memory - Force physical memory to be allocating it | |
290 | * | |
291 | * For malloc()ed memory we need to /touch/ it to make it really | |
292 | * exist. Otherwise, for write's (to storage) things may not work | |
293 | * as planned - we see Linux just use a single area to /read/ from | |
294 | * (as there isn't any memory that has been associated with the | |
295 | * allocated virtual addresses yet). | |
296 | */ | |
297 | static inline void touch_memory(char *buf, size_t bsize) | |
298 | { | |
299 | #if defined(PREP_BUFS) | |
300 | memset(buf, 0, bsize); | |
301 | #else | |
302 | size_t i; | |
303 | ||
304 | for (i = 0; i < bsize; i += pgsize) | |
305 | buf[i] = 0; | |
306 | #endif | |
307 | } | |
308 | ||
309 | /** | |
310 | * buf_alloc - Returns a page-aligned buffer of the specified size | |
311 | * @nbytes: Number of bytes to allocate | |
312 | */ | |
313 | static inline void *buf_alloc(size_t nbytes) | |
314 | { | |
315 | void *buf; | |
316 | ||
317 | if (posix_memalign(&buf, pgsize, nbytes)) { | |
318 | fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n"); | |
319 | /*NOTREACHED*/ | |
320 | } | |
321 | ||
322 | return buf; | |
323 | } | |
324 | ||
325 | /** | |
326 | * gettime - Returns current time | |
327 | */ | |
328 | static inline __u64 gettime(void) | |
329 | { | |
330 | static int use_clock_gettime = -1; // Which clock to use | |
331 | ||
332 | if (use_clock_gettime < 0) { | |
333 | use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0; | |
334 | if (use_clock_gettime) { | |
335 | struct timespec ts = { | |
336 | .tv_sec = 0, | |
337 | .tv_nsec = 0 | |
338 | }; | |
339 | clock_settime(CLOCK_MONOTONIC, &ts); | |
340 | } | |
341 | } | |
342 | ||
343 | if (use_clock_gettime) { | |
344 | struct timespec ts; | |
345 | clock_gettime(CLOCK_MONOTONIC, &ts); | |
346 | return ts2ns(&ts); | |
347 | } | |
348 | else { | |
349 | struct timeval tp; | |
350 | gettimeofday(&tp, NULL); | |
351 | return tv2ns(&tp); | |
352 | } | |
353 | } | |
354 | ||
355 | /** | |
356 | * setup_signal - Set up a signal handler for the specified signum | |
357 | */ | |
358 | static inline void setup_signal(int signum, sighandler_t handler) | |
359 | { | |
360 | if (signal(signum, handler) == SIG_ERR) { | |
361 | fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n", | |
362 | signum); | |
363 | /*NOTREACHED*/ | |
364 | } | |
365 | } | |
366 | ||
367 | /* | |
368 | * ======================================================================== | |
369 | * ==== CONDITION VARIABLE ROUTINES ======================================= | |
370 | * ======================================================================== | |
371 | */ | |
372 | ||
373 | /** | |
374 | * __set_cv - Increments a variable under condition variable control. | |
375 | * @pmp: Pointer to the associated mutex | |
376 | * @pcp: Pointer to the associated condition variable | |
377 | * @vp: Pointer to the variable being incremented | |
378 | * @mxv: Max value for variable (Used only when ASSERTS are on) | |
379 | */ | |
380 | static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp, | |
381 | volatile int *vp, | |
382 | __attribute__((__unused__))int mxv) | |
383 | { | |
384 | pthread_mutex_lock(pmp); | |
385 | assert(*vp < mxv); | |
386 | *vp += 1; | |
387 | pthread_cond_signal(pcp); | |
388 | pthread_mutex_unlock(pmp); | |
389 | } | |
390 | ||
391 | /** | |
392 | * __wait_cv - Waits for a variable under cond var control to hit a value | |
393 | * @pmp: Pointer to the associated mutex | |
394 | * @pcp: Pointer to the associated condition variable | |
395 | * @vp: Pointer to the variable being incremented | |
396 | * @mxv: Value to wait for | |
397 | */ | |
398 | static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp, | |
399 | volatile int *vp, int mxv) | |
400 | { | |
401 | pthread_mutex_lock(pmp); | |
402 | while (*vp < mxv) | |
403 | pthread_cond_wait(pcp, pmp); | |
404 | *vp = 0; | |
405 | pthread_mutex_unlock(pmp); | |
406 | } | |
407 | ||
408 | static inline void set_reclaim_done(void) | |
409 | { | |
410 | __set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done, | |
411 | nfiles); | |
412 | } | |
413 | ||
414 | static inline void wait_reclaims_done(void) | |
415 | { | |
416 | __wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done, | |
417 | nfiles); | |
418 | } | |
419 | ||
420 | static inline void set_replay_ready(void) | |
421 | { | |
422 | __set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready, | |
423 | nfiles); | |
424 | } | |
425 | ||
426 | static inline void wait_replays_ready(void) | |
427 | { | |
428 | __wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready, | |
429 | nfiles); | |
430 | } | |
431 | ||
432 | static inline void set_replay_done(void) | |
433 | { | |
434 | __set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done, | |
435 | nfiles); | |
436 | } | |
437 | ||
438 | static inline void wait_replays_done(void) | |
439 | { | |
440 | __wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done, | |
441 | nfiles); | |
442 | } | |
443 | ||
444 | static inline void set_iter_done(void) | |
445 | { | |
446 | __set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done, | |
447 | nfiles); | |
448 | } | |
449 | ||
450 | static inline void wait_iters_done(void) | |
451 | { | |
452 | __wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done, | |
453 | nfiles); | |
454 | } | |
455 | ||
456 | /** | |
457 | * wait_iter_start - Wait for an iteration to start | |
458 | * | |
459 | * This is /slightly/ different: we are waiting for a value to become | |
460 | * non-zero, and then we decrement it and go on. | |
461 | */ | |
462 | static inline void wait_iter_start(void) | |
463 | { | |
464 | pthread_mutex_lock(&iter_start_mutex); | |
465 | while (iter_start == 0) | |
466 | pthread_cond_wait(&iter_start_cond, &iter_start_mutex); | |
467 | assert(1 <= iter_start && iter_start <= nfiles); | |
468 | iter_start--; | |
469 | pthread_mutex_unlock(&iter_start_mutex); | |
470 | } | |
471 | ||
472 | /** | |
473 | * start_iter - Start an iteration at the replay thread level | |
474 | */ | |
475 | static inline void start_iter(void) | |
476 | { | |
477 | pthread_mutex_lock(&iter_start_mutex); | |
478 | assert(iter_start == 0); | |
479 | iter_start = nfiles; | |
480 | pthread_cond_broadcast(&iter_start_cond); | |
481 | pthread_mutex_unlock(&iter_start_mutex); | |
482 | } | |
483 | ||
484 | /* | |
485 | * ======================================================================== | |
486 | * ==== CPU RELATED ROUTINES ============================================== | |
487 | * ======================================================================== | |
488 | */ | |
489 | ||
490 | /** | |
491 | * get_ncpus - Sets up the global 'ncpus' value | |
492 | */ | |
493 | static void get_ncpus(void) | |
494 | { | |
495 | cpu_set_t cpus; | |
496 | ||
497 | if (sched_getaffinity(getpid(), sizeof(cpus), &cpus)) { | |
498 | fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n"); | |
499 | /*NOTREACHED*/ | |
500 | } | |
501 | ||
502 | /* | |
503 | * XXX This assumes (perhaps wrongly) that there are no /holes/ | |
504 | * XXX in the mask. | |
505 | */ | |
506 | for (ncpus = 0; ncpus < CPU_SETSIZE && CPU_ISSET(ncpus, &cpus); ncpus++) | |
507 | ; | |
508 | if (ncpus == 0) { | |
509 | fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n"); | |
510 | /*NOTREACHED*/ | |
511 | } | |
512 | } | |
513 | ||
514 | /** | |
515 | * pin_to_cpu - Pin this thread to a specific CPU | |
516 | * @tip: Thread information | |
517 | */ | |
518 | static void pin_to_cpu(struct thr_info *tip) | |
519 | { | |
520 | cpu_set_t cpus; | |
521 | ||
522 | assert(0 <= tip->cpu && tip->cpu < ncpus); | |
523 | ||
524 | CPU_ZERO(&cpus); | |
525 | CPU_SET(tip->cpu, &cpus); | |
526 | if (sched_setaffinity(getpid(), sizeof(cpus), &cpus)) { | |
527 | fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n"); | |
528 | /*NOTREACHED*/ | |
529 | } | |
530 | ||
531 | if (verbose > 1) { | |
532 | int i; | |
533 | cpu_set_t now; | |
534 | ||
535 | (void)sched_getaffinity(getpid(), sizeof(now), &now); | |
536 | fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu); | |
537 | for (i = 0; i < ncpus; i++) | |
538 | fprintf(tip->vfp, "%1d", CPU_ISSET(i, &now)); | |
539 | fprintf(tip->vfp, "\n"); | |
540 | } | |
541 | } | |
542 | ||
543 | /* | |
544 | * ======================================================================== | |
545 | * ==== INPUT DEVICE HANDLERS ============================================= | |
546 | * ======================================================================== | |
547 | */ | |
548 | ||
549 | /** | |
550 | * add_input_dev - Add a device ('sd*') to the list of devices to handle | |
551 | */ | |
552 | static void add_input_dev(char *devnm) | |
553 | { | |
554 | struct list_head *p; | |
555 | struct dev_info *dip; | |
556 | ||
557 | __list_for_each(p, &input_devs) { | |
558 | dip = list_entry(p, struct dev_info, head); | |
559 | if (strcmp(dip->devnm, devnm) == 0) | |
560 | return; | |
561 | } | |
562 | ||
563 | dip = malloc(sizeof(*dip)); | |
564 | dip->devnm = strdup(devnm); | |
565 | list_add_tail(&dip->head, &input_devs); | |
566 | } | |
567 | ||
568 | /** | |
569 | * rem_input_dev - Remove resources associated with this device | |
570 | */ | |
571 | static void rem_input_dev(struct dev_info *dip) | |
572 | { | |
573 | list_del(&dip->head); | |
574 | free(dip->devnm); | |
575 | free(dip); | |
576 | } | |
577 | ||
578 | static void find_input_devs(char *idir) | |
579 | { | |
580 | struct dirent *ent; | |
581 | DIR *dir = opendir(idir); | |
582 | ||
583 | if (dir == NULL) { | |
584 | fatal(idir, ERR_ARGS, "Unable to open %s\n", idir); | |
585 | /*NOTREACHED*/ | |
586 | } | |
587 | ||
588 | while ((ent = readdir(dir)) != NULL) { | |
589 | char *p, *dsf = malloc(256); | |
590 | ||
591 | if (strstr(ent->d_name, ".replay.") == NULL) | |
592 | continue; | |
593 | ||
594 | dsf = strdup(ent->d_name); | |
595 | p = index(dsf, '.'); | |
596 | assert(p != NULL); | |
597 | *p = '\0'; | |
598 | add_input_dev(dsf); | |
599 | free(dsf); | |
600 | } | |
601 | ||
602 | closedir(dir); | |
603 | } | |
604 | ||
605 | /* | |
606 | * ======================================================================== | |
607 | * ==== MAP DEVICE INTERFACES ============================================= | |
608 | * ======================================================================== | |
609 | */ | |
610 | ||
611 | /** | |
612 | * read_map_devs - Read in a set of device mapping from the provided file. | |
613 | * @file_name: File containing device maps | |
614 | * | |
615 | * We support the notion of multiple such files being specifed on the cmd line | |
616 | */ | |
617 | static void read_map_devs(char *file_name) | |
618 | { | |
619 | FILE *fp; | |
620 | char *from_dev, *to_dev; | |
621 | ||
622 | fp = fopen(file_name, "r"); | |
623 | if (!fp) { | |
624 | fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n"); | |
625 | /*NOTREACHED*/ | |
626 | } | |
627 | ||
628 | while (fscanf(fp, "%as %as", &from_dev, &to_dev) == 2) { | |
629 | struct map_dev *mdp = malloc(sizeof(*mdp)); | |
630 | ||
631 | mdp->from_dev = from_dev; | |
632 | mdp->to_dev = to_dev; | |
633 | list_add_tail(&mdp->head, &map_devs); | |
634 | } | |
635 | ||
636 | fclose(fp); | |
637 | } | |
638 | ||
639 | /** | |
640 | * release_map_devs - Release resources associated with device mappings. | |
641 | */ | |
642 | static void release_map_devs(void) | |
643 | { | |
644 | struct list_head *p, *q; | |
645 | ||
646 | list_for_each_safe(p, q, &map_devs) { | |
647 | struct map_dev *mdp = list_entry(p, struct map_dev, head); | |
648 | ||
649 | list_del(&mdp->head); | |
650 | ||
651 | free(mdp->from_dev); | |
652 | free(mdp->to_dev); | |
653 | free(mdp); | |
654 | } | |
655 | } | |
656 | ||
657 | /** | |
658 | * map_dev - Return the mapped device for that specified | |
659 | * @from_dev: Device name as seen on recorded system | |
660 | * | |
661 | * Note: If there is no such mapping, we return the same name. | |
662 | */ | |
663 | static char *map_dev(char *from_dev) | |
664 | { | |
665 | struct list_head *p; | |
666 | ||
667 | __list_for_each(p, &map_devs) { | |
668 | struct map_dev *mdp = list_entry(p, struct map_dev, head); | |
669 | ||
670 | if (strcmp(from_dev, mdp->from_dev) == 0) | |
671 | return mdp->to_dev; | |
672 | } | |
673 | ||
674 | return from_dev; | |
675 | } | |
676 | ||
677 | /* | |
678 | * ======================================================================== | |
679 | * ==== IOCB MANAGEMENT ROUTINES ========================================== | |
680 | * ======================================================================== | |
681 | */ | |
682 | ||
683 | /** | |
684 | * iocb_init - Initialize the fields of an IOCB | |
685 | * @tip: Per-thread information | |
686 | * iocbp: IOCB pointer to update | |
687 | */ | |
688 | static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp) | |
689 | { | |
690 | iocbp->tip = tip; | |
691 | iocbp->nbytes = 0; | |
692 | iocbp->iocb.u.c.buf = NULL; | |
693 | } | |
694 | ||
695 | /** | |
696 | * iocb_setup - Set up an iocb with this AIOs information | |
697 | * @iocbp: IOCB pointer to update | |
698 | * @rw: Direction (0 == write, 1 == read) | |
699 | * @n: Number of bytes to transfer | |
700 | * @off: Offset (in bytes) | |
701 | */ | |
702 | static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off) | |
703 | { | |
704 | char *buf; | |
705 | struct iocb *iop = &iocbp->iocb; | |
706 | ||
707 | assert(rw == 0 || rw == 1); | |
708 | assert(0 < n && (n % nb_sec) == 0); | |
709 | assert(0 <= off); | |
710 | ||
711 | if (iocbp->nbytes) { | |
712 | if (iocbp->nbytes >= n) { | |
713 | buf = iop->u.c.buf; | |
714 | goto prep; | |
715 | } | |
716 | ||
717 | assert(iop->u.c.buf); | |
718 | free(iop->u.c.buf); | |
719 | } | |
720 | ||
721 | buf = buf_alloc(n); | |
722 | iocbp->nbytes = n; | |
723 | ||
724 | prep: | |
725 | if (rw) | |
726 | io_prep_pread(iop, iocbp->tip->ofd, buf, n, off); | |
727 | else { | |
728 | assert(write_enabled); | |
729 | io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off); | |
730 | touch_memory(buf, n); | |
731 | } | |
732 | ||
733 | iop->data = iocbp; | |
734 | } | |
735 | ||
736 | /* | |
737 | * ======================================================================== | |
738 | * ==== PER-THREAD SET UP & TEAR DOWN ===================================== | |
739 | * ======================================================================== | |
740 | */ | |
741 | ||
742 | /** | |
743 | * tip_init - Per thread initialization function | |
744 | */ | |
745 | static void tip_init(struct thr_info *tip) | |
746 | { | |
747 | int i; | |
748 | ||
749 | INIT_LIST_HEAD(&tip->free_iocbs); | |
750 | INIT_LIST_HEAD(&tip->used_iocbs); | |
751 | ||
752 | pthread_mutex_init(&tip->mutex, NULL); | |
753 | pthread_cond_init(&tip->cond, NULL); | |
754 | ||
755 | if (io_setup(naios, &tip->ctx)) { | |
756 | fatal("io_setup", ERR_SYSCALL, "io_setup failed\n"); | |
757 | /*NOTREACHED*/ | |
758 | } | |
759 | ||
760 | tip->ofd = -1; | |
761 | tip->naios_out = 0; | |
762 | tip->send_done = tip->reap_done = 0; | |
763 | tip->send_wait = tip->reap_wait = 0; | |
764 | ||
765 | memset(&tip->sub_thread, 0, sizeof(tip->sub_thread)); | |
766 | memset(&tip->rec_thread, 0, sizeof(tip->rec_thread)); | |
767 | ||
768 | for (i = 0; i < naios; i++) { | |
769 | struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp)); | |
770 | ||
771 | iocb_init(tip, iocbp); | |
772 | list_add_tail(&iocbp->head, &tip->free_iocbs); | |
773 | } | |
774 | tip->naios_free = naios; | |
775 | ||
776 | if (verbose > 1) { | |
777 | char fn[MAXPATHLEN]; | |
778 | ||
779 | sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase, | |
780 | tip->cpu); | |
781 | tip->vfp = fopen(fn, "w"); | |
782 | if (!tip->vfp) { | |
783 | fatal(fn, ERR_SYSCALL, "Failed to open report\n"); | |
784 | /*NOTREACHED*/ | |
785 | } | |
786 | ||
787 | setlinebuf(tip->vfp); | |
788 | } | |
789 | ||
790 | if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) { | |
791 | fatal("pthread_create", ERR_SYSCALL, | |
792 | "thread create failed\n"); | |
793 | /*NOTREACHED*/ | |
794 | } | |
795 | ||
796 | if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) { | |
797 | fatal("pthread_create", ERR_SYSCALL, | |
798 | "thread create failed\n"); | |
799 | /*NOTREACHED*/ | |
800 | } | |
801 | } | |
802 | ||
803 | /** | |
804 | * tip_release - Release resources associated with this thread | |
805 | */ | |
806 | static void tip_release(struct thr_info *tip) | |
807 | { | |
808 | struct list_head *p, *q; | |
809 | ||
810 | assert(tip->send_done); | |
811 | assert(tip->reap_done); | |
812 | assert(list_len(&tip->used_iocbs) == 0); | |
813 | assert(tip->naios_free == naios); | |
814 | ||
815 | if (pthread_join(tip->sub_thread, NULL)) { | |
816 | fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n"); | |
817 | /*NOTREACHED*/ | |
818 | } | |
819 | if (pthread_join(tip->rec_thread, NULL)) { | |
820 | fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n"); | |
821 | /*NOTREACHED*/ | |
822 | } | |
823 | ||
824 | io_destroy(tip->ctx); | |
825 | ||
826 | list_splice(&tip->used_iocbs, &tip->free_iocbs); | |
827 | list_for_each_safe(p, q, &tip->free_iocbs) { | |
828 | struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head); | |
829 | ||
830 | list_del(&iocbp->head); | |
831 | if (iocbp->nbytes) | |
832 | free(iocbp->iocb.u.c.buf); | |
833 | free(iocbp); | |
834 | } | |
835 | ||
836 | pthread_cond_destroy(&tip->cond); | |
837 | pthread_mutex_destroy(&tip->mutex); | |
838 | } | |
839 | ||
840 | /** | |
841 | * add_input_file - Allocate and initialize per-input file structure | |
842 | * @cpu: CPU for this file | |
843 | * @devnm: Device name for this file | |
844 | * @file_name: Fully qualifed input file name | |
845 | */ | |
846 | static void add_input_file(int cpu, char *devnm, char *file_name) | |
847 | { | |
848 | struct stat buf; | |
849 | struct io_file_hdr hdr; | |
850 | struct thr_info *tip = buf_alloc(sizeof(*tip)); | |
851 | __u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub); | |
852 | ||
853 | assert(0 <= cpu && cpu < ncpus); | |
854 | ||
855 | memset(&hdr, 0, sizeof(hdr)); | |
856 | memset(tip, 0, sizeof(*tip)); | |
857 | tip->cpu = cpu % cpus_to_use; | |
858 | tip->iterations = def_iterations; | |
859 | ||
860 | tip->ifd = open(file_name, O_RDONLY); | |
861 | if (tip->ifd < 0) { | |
862 | fatal(file_name, ERR_ARGS, "Unable to open\n"); | |
863 | /*NOTREACHED*/ | |
864 | } | |
865 | if (fstat(tip->ifd, &buf) < 0) { | |
866 | fatal(file_name, ERR_SYSCALL, "fstat failed\n"); | |
867 | /*NOTREACHED*/ | |
868 | } | |
869 | if (buf.st_size < (off_t)sizeof(hdr)) { | |
870 | if (verbose) | |
871 | fprintf(stderr, "\t%s empty\n", file_name); | |
872 | goto empty_file; | |
873 | } | |
874 | ||
875 | if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) { | |
876 | fatal(file_name, ERR_ARGS, "Header read failed\n"); | |
877 | /*NOTREACHED*/ | |
878 | } | |
879 | ||
880 | if (hdr.version != my_version) { | |
881 | fprintf(stderr, "%llx %llx %llx %llx\n", | |
882 | (long long unsigned)hdr.version, | |
883 | (long long unsigned)hdr.genesis, | |
884 | (long long unsigned)hdr.nbunches, | |
885 | (long long unsigned)hdr.total_pkts); | |
886 | fatal(NULL, ERR_ARGS, | |
887 | "BT version mismatch: %lx versus my %lx\n", | |
888 | (long)hdr.version, (long)my_version); | |
889 | ||
890 | } | |
891 | ||
892 | if (hdr.nbunches == 0) { | |
893 | empty_file: | |
894 | close(tip->ifd); | |
895 | free(tip); | |
896 | return; | |
897 | } | |
898 | ||
899 | if (hdr.genesis < genesis) { | |
900 | if (verbose > 1) | |
901 | fprintf(stderr, "Setting genesis to %llu.%llu\n", | |
902 | du64_to_sec(hdr.genesis), | |
903 | du64_to_nsec(hdr.genesis)); | |
904 | genesis = hdr.genesis; | |
905 | } | |
906 | ||
907 | tip->devnm = strdup(devnm); | |
908 | tip->file_name = strdup(file_name); | |
909 | ||
910 | list_add_tail(&tip->head, &input_files); | |
911 | ||
912 | if (verbose) | |
913 | fprintf(stderr, "Added %s %llu\n", file_name, | |
914 | (long long)hdr.genesis); | |
915 | } | |
916 | ||
917 | /** | |
918 | * rem_input_file - Release resources associated with an input file | |
919 | * @tip: Per-input file information | |
920 | */ | |
921 | static void rem_input_file(struct thr_info *tip) | |
922 | { | |
923 | list_del(&tip->head); | |
924 | ||
925 | tip_release(tip); | |
926 | ||
927 | close(tip->ofd); | |
928 | close(tip->ifd); | |
929 | free(tip->file_name); | |
930 | free(tip->devnm); | |
931 | free(tip); | |
932 | } | |
933 | ||
934 | /** | |
935 | * rem_input_files - Remove all input files | |
936 | */ | |
937 | static void rem_input_files(void) | |
938 | { | |
939 | struct list_head *p, *q; | |
940 | ||
941 | list_for_each_safe(p, q, &input_files) { | |
942 | rem_input_file(list_entry(p, struct thr_info, head)); | |
943 | } | |
944 | } | |
945 | ||
946 | /** | |
947 | * __find_input_files - Find input files associated with this device (per cpu) | |
948 | */ | |
949 | static void __find_input_files(struct dev_info *dip) | |
950 | { | |
951 | int cpu = 0; | |
952 | ||
953 | for (;;) { | |
954 | char full_name[MAXPATHLEN]; | |
955 | ||
956 | sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu); | |
957 | if (access(full_name, R_OK) != 0) | |
958 | break; | |
959 | ||
960 | add_input_file(cpu, dip->devnm, full_name); | |
961 | cpu++; | |
962 | } | |
963 | ||
964 | if (!cpu) { | |
965 | fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm); | |
966 | /*NOTREACHED*/ | |
967 | } | |
968 | ||
969 | rem_input_dev(dip); | |
970 | } | |
971 | ||
972 | ||
973 | /** | |
974 | * find_input_files - Find input files for all devices | |
975 | */ | |
976 | static void find_input_files(void) | |
977 | { | |
978 | struct list_head *p, *q; | |
979 | ||
980 | list_for_each_safe(p, q, &input_devs) { | |
981 | __find_input_files(list_entry(p, struct dev_info, head)); | |
982 | } | |
983 | } | |
984 | ||
985 | /* | |
986 | * ======================================================================== | |
987 | * ==== RECLAIM ROUTINES ================================================== | |
988 | * ======================================================================== | |
989 | */ | |
990 | ||
991 | /** | |
992 | * reap_wait_aios - Wait for and return number of outstanding AIOs | |
993 | * | |
994 | * Will return 0 if we are done | |
995 | */ | |
996 | static int reap_wait_aios(struct thr_info *tip) | |
997 | { | |
998 | int naios = 0; | |
999 | ||
1000 | if (!is_reap_done(tip)) { | |
1001 | pthread_mutex_lock(&tip->mutex); | |
1002 | while (tip->naios_out == 0) { | |
1003 | tip->reap_wait = 1; | |
1004 | if (pthread_cond_wait(&tip->cond, &tip->mutex)) { | |
1005 | fatal("pthread_cond_wait", ERR_SYSCALL, | |
1006 | "nfree_current cond wait failed\n"); | |
1007 | /*NOTREACHED*/ | |
1008 | } | |
1009 | } | |
1010 | naios = tip->naios_out; | |
1011 | pthread_mutex_unlock(&tip->mutex); | |
1012 | } | |
1013 | assert(is_reap_done(tip) || naios > 0); | |
1014 | ||
1015 | return is_reap_done(tip) ? 0 : naios; | |
1016 | } | |
1017 | ||
1018 | /** | |
1019 | * reclaim_ios - Reclaim AIOs completed, recycle IOCBs | |
1020 | * @tip: Per-thread information | |
1021 | * @naios_out: Number of AIOs we have outstanding (min) | |
1022 | */ | |
1023 | static void reclaim_ios(struct thr_info *tip, long naios_out) | |
1024 | { | |
1025 | long i, ndone; | |
1026 | struct io_event *evp, events[naios_out]; | |
1027 | ||
1028 | again: | |
1029 | assert(naios > 0); | |
1030 | for (;;) { | |
1031 | ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL); | |
1032 | if (ndone > 0) | |
1033 | break; | |
1034 | ||
1035 | if (errno && errno != EINTR) { | |
1036 | fatal("io_getevents", ERR_SYSCALL, | |
1037 | "io_getevents failed\n"); | |
1038 | /*NOTREACHED*/ | |
1039 | } | |
1040 | } | |
1041 | assert(0 < ndone && ndone <= naios_out); | |
1042 | ||
1043 | pthread_mutex_lock(&tip->mutex); | |
1044 | for (i = 0, evp = events; i < ndone; i++, evp++) { | |
1045 | struct iocb_pkt *iocbp = evp->data; | |
1046 | ||
1047 | if (evp->res != iocbp->iocb.u.c.nbytes) { | |
1048 | fatal(NULL, ERR_SYSCALL, | |
1049 | "Event failure %ld/%ld\t(%ld + %ld)\n", | |
1050 | (long)evp->res, (long)evp->res2, | |
1051 | (long)iocbp->iocb.u.c.offset / nb_sec, | |
1052 | (long)iocbp->iocb.u.c.nbytes / nb_sec); | |
1053 | /*NOTREACHED*/ | |
1054 | } | |
1055 | ||
1056 | list_move_tail(&iocbp->head, &tip->free_iocbs); | |
1057 | } | |
1058 | ||
1059 | tip->naios_free += ndone; | |
1060 | tip->naios_out -= ndone; | |
1061 | naios_out = minl(naios_out, tip->naios_out); | |
1062 | ||
1063 | if (tip->send_wait) { | |
1064 | tip->send_wait = 0; | |
1065 | pthread_cond_signal(&tip->cond); | |
1066 | } | |
1067 | pthread_mutex_unlock(&tip->mutex); | |
1068 | ||
1069 | /* | |
1070 | * Short cut: If we /know/ there are some more AIOs, go handle them | |
1071 | */ | |
1072 | if (naios_out) | |
1073 | goto again; | |
1074 | } | |
1075 | ||
1076 | /** | |
1077 | * replay_rec - Worker thread to reclaim AIOs | |
1078 | * @arg: Pointer to thread information | |
1079 | */ | |
1080 | static void *replay_rec(void *arg) | |
1081 | { | |
1082 | long naios_out; | |
1083 | struct thr_info *tip = arg; | |
1084 | ||
1085 | while ((naios_out = reap_wait_aios(tip)) > 0) | |
1086 | reclaim_ios(tip, naios_out); | |
1087 | ||
1088 | assert(tip->send_done); | |
1089 | tip->reap_done = 1; | |
1090 | set_reclaim_done(); | |
1091 | ||
1092 | return NULL; | |
1093 | } | |
1094 | ||
1095 | /* | |
1096 | * ======================================================================== | |
1097 | * ==== REPLAY ROUTINES =================================================== | |
1098 | * ======================================================================== | |
1099 | */ | |
1100 | ||
1101 | /** | |
1102 | * next_bunch - Retrieve next bunch of AIOs to process | |
1103 | * @tip: Per-thread information | |
1104 | * @bunch: Bunch information | |
1105 | * | |
1106 | * Returns TRUE if we recovered a bunch of IOs, else hit EOF | |
1107 | */ | |
1108 | static int next_bunch(struct thr_info *tip, struct io_bunch *bunch) | |
1109 | { | |
1110 | size_t count, result; | |
1111 | ||
1112 | result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr)); | |
1113 | if (result != sizeof(bunch->hdr)) { | |
1114 | if (result == 0) | |
1115 | return 0; | |
1116 | ||
1117 | fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n", | |
1118 | (long)result); | |
1119 | /*NOTREACHED*/ | |
1120 | } | |
1121 | assert(bunch->hdr.npkts <= BT_MAX_PKTS); | |
1122 | ||
1123 | count = bunch->hdr.npkts * sizeof(struct io_pkt); | |
1124 | result = read(tip->ifd, &bunch->pkts, count); | |
1125 | if (result != count) { | |
1126 | fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n", | |
1127 | (long)result, (long)count); | |
1128 | /*NOTREACHED*/ | |
1129 | } | |
1130 | ||
1131 | return 1; | |
1132 | } | |
1133 | ||
1134 | /** | |
1135 | * nfree_current - Returns current number of AIOs that are free | |
1136 | * | |
1137 | * Will wait for available ones... | |
1138 | * | |
1139 | * Returns 0 if we have some condition that causes us to exit | |
1140 | */ | |
1141 | static int nfree_current(struct thr_info *tip) | |
1142 | { | |
1143 | int nfree = 0; | |
1144 | ||
1145 | pthread_mutex_lock(&tip->mutex); | |
1146 | while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) { | |
1147 | tip->send_wait = 1; | |
1148 | if (pthread_cond_wait(&tip->cond, &tip->mutex)) { | |
1149 | fatal("pthread_cond_wait", ERR_SYSCALL, | |
1150 | "nfree_current cond wait failed\n"); | |
1151 | /*NOTREACHED*/ | |
1152 | } | |
1153 | } | |
1154 | pthread_mutex_unlock(&tip->mutex); | |
1155 | ||
1156 | return nfree; | |
1157 | } | |
1158 | ||
1159 | /** | |
1160 | * stall - Stall for the number of nanoseconds requested | |
1161 | * | |
1162 | * We may be late, in which case we just return. | |
1163 | */ | |
1164 | static void stall(struct thr_info *tip, long long oclock) | |
1165 | { | |
1166 | struct timespec req; | |
1167 | long long dreal, tclock = gettime() - rgenesis; | |
1168 | ||
1169 | if (verbose > 1) | |
1170 | fprintf(tip->vfp, " stall(%lld.%09lld, %lld.%09lld)\n", | |
1171 | du64_to_sec(oclock), du64_to_nsec(oclock), | |
1172 | du64_to_sec(tclock), du64_to_nsec(tclock)); | |
1173 | ||
1174 | while (!is_send_done(tip) && tclock < oclock) { | |
1175 | dreal = oclock - tclock; | |
1176 | req.tv_sec = dreal / (1000 * 1000 * 1000); | |
1177 | req.tv_nsec = dreal % (1000 * 1000 * 1000); | |
1178 | ||
1179 | if (verbose > 1) { | |
1180 | fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n", | |
1181 | (long long)req.tv_sec, | |
1182 | (long long)req.tv_nsec); | |
1183 | } | |
1184 | ||
1185 | if (nanosleep(&req, NULL) < 0 && signal_done) | |
1186 | break; | |
1187 | ||
1188 | tclock = gettime() - rgenesis; | |
1189 | } | |
1190 | } | |
1191 | ||
1192 | /** | |
1193 | * iocbs_map - Map a set of AIOs onto a set of IOCBs | |
1194 | * @tip: Per-thread information | |
1195 | * @list: List of AIOs created | |
1196 | * @pkts: AIOs to map | |
1197 | * @ntodo: Number of AIOs to map | |
1198 | */ | |
1199 | static void iocbs_map(struct thr_info *tip, struct iocb **list, | |
1200 | struct io_pkt *pkts, int ntodo) | |
1201 | { | |
1202 | int i; | |
1203 | struct io_pkt *pkt; | |
1204 | ||
1205 | assert(0 < ntodo && ntodo <= naios); | |
1206 | ||
1207 | pthread_mutex_lock(&tip->mutex); | |
1208 | assert(ntodo <= list_len(&tip->free_iocbs)); | |
1209 | for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) { | |
1210 | __u32 rw = pkt->rw; | |
1211 | struct iocb_pkt *iocbp; | |
1212 | ||
1213 | if (!pkt->rw && !write_enabled) | |
1214 | rw = 1; | |
1215 | ||
1216 | if (verbose > 1) | |
1217 | fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n", | |
1218 | (unsigned long long)pkt->sector, | |
1219 | (unsigned long long)pkt->nbytes / nb_sec, | |
1220 | rw ? 'R' : 'W', | |
1221 | (rw == 1 && pkt->rw == 0) ? '!' : ' '); | |
1222 | ||
1223 | iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head); | |
1224 | iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec); | |
1225 | ||
1226 | list_move_tail(&iocbp->head, &tip->used_iocbs); | |
1227 | list[i] = &iocbp->iocb; | |
1228 | } | |
1229 | ||
1230 | tip->naios_free -= ntodo; | |
1231 | assert(tip->naios_free >= 0); | |
1232 | pthread_mutex_unlock(&tip->mutex); | |
1233 | } | |
1234 | ||
1235 | /** | |
1236 | * process_bunch - Process a bunch of requests | |
1237 | * @tip: Per-thread information | |
1238 | * @bunch: Bunch to process | |
1239 | */ | |
1240 | static void process_bunch(struct thr_info *tip, struct io_bunch *bunch) | |
1241 | { | |
1242 | __u64 i = 0; | |
1243 | struct iocb *list[bunch->hdr.npkts]; | |
1244 | ||
1245 | assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS); | |
1246 | while (!is_send_done(tip) && (i < bunch->hdr.npkts)) { | |
1247 | long ndone; | |
1248 | int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i); | |
1249 | ||
1250 | assert(0 < ntodo && ntodo <= naios); | |
1251 | iocbs_map(tip, list, &bunch->pkts[i], ntodo); | |
1252 | if (!no_stalls) | |
1253 | stall(tip, bunch->hdr.time_stamp - genesis); | |
1254 | ||
1255 | if (ntodo) { | |
1256 | if (verbose > 1) | |
1257 | fprintf(tip->vfp, "submit(%d)\n", ntodo); | |
1258 | ndone = io_submit(tip->ctx, ntodo, list); | |
1259 | if (ndone != (long)ntodo) { | |
1260 | fatal("io_submit", ERR_SYSCALL, | |
1261 | "%d: io_submit(%d:%ld) failed (%s)\n", | |
1262 | tip->cpu, ntodo, ndone, | |
1263 | strerror(labs(ndone))); | |
1264 | /*NOTREACHED*/ | |
1265 | } | |
1266 | ||
1267 | pthread_mutex_lock(&tip->mutex); | |
1268 | tip->naios_out += ndone; | |
1269 | assert(tip->naios_out <= naios); | |
1270 | if (tip->reap_wait) { | |
1271 | tip->reap_wait = 0; | |
1272 | pthread_cond_signal(&tip->cond); | |
1273 | } | |
1274 | pthread_mutex_unlock(&tip->mutex); | |
1275 | ||
1276 | i += ndone; | |
1277 | assert(i <= bunch->hdr.npkts); | |
1278 | } | |
1279 | } | |
1280 | } | |
1281 | ||
1282 | /** | |
1283 | * reset_input_file - Reset the input file for the next iteration | |
1284 | * @tip: Thread information | |
1285 | * | |
1286 | * We also do a dummy read of the file header to get us to the first bunch. | |
1287 | */ | |
1288 | static void reset_input_file(struct thr_info *tip) | |
1289 | { | |
1290 | struct io_file_hdr hdr; | |
1291 | ||
1292 | lseek(tip->ifd, 0, 0); | |
1293 | ||
1294 | if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) { | |
1295 | fatal(tip->file_name, ERR_ARGS, "Header reread failed\n"); | |
1296 | /*NOTREACHED*/ | |
1297 | } | |
1298 | } | |
1299 | ||
1300 | /** | |
1301 | * replay_sub - Worker thread to submit AIOs that are being replayed | |
1302 | */ | |
1303 | static void *replay_sub(void *arg) | |
1304 | { | |
1305 | char path[MAXPATHLEN]; | |
1306 | struct io_bunch bunch; | |
1307 | struct thr_info *tip = arg; | |
1308 | ||
1309 | pin_to_cpu(tip); | |
1310 | ||
1311 | sprintf(path, "/dev/%s", map_dev(tip->devnm)); | |
1312 | tip->ofd = open(path, O_RDWR | O_DIRECT); | |
1313 | if (tip->ofd < 0) { | |
1314 | fatal(path, ERR_SYSCALL, "Failed device open\n"); | |
1315 | /*NOTREACHED*/ | |
1316 | } | |
1317 | ||
1318 | set_replay_ready(); | |
1319 | while (!is_send_done(tip) && tip->iterations--) { | |
1320 | wait_iter_start(); | |
1321 | if (verbose) | |
1322 | fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations); | |
1323 | while (!is_send_done(tip) && next_bunch(tip, &bunch)) | |
1324 | process_bunch(tip, &bunch); | |
1325 | set_iter_done(); | |
1326 | reset_input_file(tip); | |
1327 | } | |
1328 | tip->send_done = 1; | |
1329 | set_replay_done(); | |
1330 | ||
1331 | return NULL; | |
1332 | } | |
1333 | ||
1334 | /* | |
1335 | * ======================================================================== | |
1336 | * ==== COMMAND LINE ARGUMENT HANDLING ==================================== | |
1337 | * ======================================================================== | |
1338 | */ | |
1339 | ||
1340 | static char usage_str[] = \ | |
1341 | "\n" \ | |
1342 | "\t[ -c <cpus> : --cpus=<cpus> ] Default: 1\n" \ | |
1343 | "\t[ -d <dir> : --input-directory=<dir> ] Default: .\n" \ | |
1344 | "\t[ -F : --find-records ] Default: Off\n" \ | |
1345 | "\t[ -h : --help ] Default: Off\n" \ | |
1346 | "\t[ -i <base> : --input-base=<base> ] Default: replay\n" \ | |
1347 | "\t[ -I <iters>: --iterations=<iters> ] Default: 1\n" \ | |
1348 | "\t[ -M <file> : --map-devs=<file> ] Default: None\n" \ | |
1349 | "\t[ -N : --no-stalls ] Default: Off\n" \ | |
1350 | "\t[ -v : --verbose ] Default: Off\n" \ | |
1351 | "\t[ -V : --version ] Default: Off\n" \ | |
1352 | "\t[ -W : --write-enable ] Default: Off\n" \ | |
1353 | "\t<dev...> Default: None\n" \ | |
1354 | "\n"; | |
1355 | ||
1356 | #define S_OPTS "c:d:Fhi:I:M:Nt:vVW" | |
1357 | static struct option l_opts[] = { | |
1358 | { | |
1359 | .name = "cpus", | |
1360 | .has_arg = required_argument, | |
1361 | .flag = NULL, | |
1362 | .val = 'c' | |
1363 | }, | |
1364 | { | |
1365 | .name = "input-directory", | |
1366 | .has_arg = required_argument, | |
1367 | .flag = NULL, | |
1368 | .val = 'd' | |
1369 | }, | |
1370 | { | |
1371 | .name = "find-records", | |
1372 | .has_arg = no_argument, | |
1373 | .flag = NULL, | |
1374 | .val = 'F' | |
1375 | }, | |
1376 | { | |
1377 | .name = "help", | |
1378 | .has_arg = no_argument, | |
1379 | .flag = NULL, | |
1380 | .val = 'h' | |
1381 | }, | |
1382 | { | |
1383 | .name = "input-base", | |
1384 | .has_arg = required_argument, | |
1385 | .flag = NULL, | |
1386 | .val = 'i' | |
1387 | }, | |
1388 | { | |
1389 | .name = "iterations", | |
1390 | .has_arg = required_argument, | |
1391 | .flag = NULL, | |
1392 | .val = 'I' | |
1393 | }, | |
1394 | { | |
1395 | .name = "map-devs", | |
1396 | .has_arg = required_argument, | |
1397 | .flag = NULL, | |
1398 | .val = 'M' | |
1399 | }, | |
1400 | { | |
1401 | .name = "no-stalls", | |
1402 | .has_arg = no_argument, | |
1403 | .flag = NULL, | |
1404 | .val = 'N' | |
1405 | }, | |
1406 | { | |
1407 | .name = "verbose", | |
1408 | .has_arg = no_argument, | |
1409 | .flag = NULL, | |
1410 | .val = 'v' | |
1411 | }, | |
1412 | { | |
1413 | .name = "version", | |
1414 | .has_arg = no_argument, | |
1415 | .flag = NULL, | |
1416 | .val = 'V' | |
1417 | }, | |
1418 | { | |
1419 | .name = "write-enable", | |
1420 | .has_arg = no_argument, | |
1421 | .flag = NULL, | |
1422 | .val = 'W' | |
1423 | }, | |
1424 | { | |
1425 | .name = NULL | |
1426 | } | |
1427 | }; | |
1428 | ||
1429 | /** | |
1430 | * handle_args: Parse passed in argument list | |
1431 | * @argc: Number of arguments in argv | |
1432 | * @argv: Arguments passed in | |
1433 | * | |
1434 | * Does rudimentary parameter verification as well. | |
1435 | */ | |
1436 | static void handle_args(int argc, char *argv[]) | |
1437 | { | |
1438 | int c; | |
1439 | ||
1440 | while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) { | |
1441 | switch (c) { | |
1442 | case 'c': | |
1443 | cpus_to_use = atoi(optarg); | |
1444 | if (cpus_to_use <= 0 || cpus_to_use > ncpus) { | |
1445 | fatal(NULL, ERR_ARGS, | |
1446 | "Invalid number of cpus %d (0<x<%d)\n", | |
1447 | cpus_to_use, ncpus); | |
1448 | /*NOTREACHED*/ | |
1449 | } | |
1450 | break; | |
1451 | ||
1452 | case 'd': | |
1453 | idir = optarg; | |
1454 | if (access(idir, R_OK | X_OK) != 0) { | |
1455 | fatal(idir, ERR_ARGS, | |
1456 | "Invalid input directory specified\n"); | |
1457 | /*NOTREACHED*/ | |
1458 | } | |
1459 | break; | |
1460 | ||
1461 | case 'F': | |
1462 | find_records = 1; | |
1463 | break; | |
1464 | ||
1465 | case 'h': | |
1466 | usage(); | |
1467 | exit(0); | |
1468 | /*NOTREACHED*/ | |
1469 | ||
1470 | case 'i': | |
1471 | ibase = optarg; | |
1472 | break; | |
1473 | ||
1474 | case 'I': | |
1475 | def_iterations = atoi(optarg); | |
1476 | if (def_iterations <= 0) { | |
1477 | fprintf(stderr, | |
1478 | "Invalid number of iterations %d\n", | |
1479 | def_iterations); | |
1480 | exit(ERR_ARGS); | |
1481 | /*NOTREACHED*/ | |
1482 | } | |
1483 | break; | |
1484 | ||
1485 | case 'M': | |
1486 | read_map_devs(optarg); | |
1487 | break; | |
1488 | ||
1489 | case 'N': | |
1490 | no_stalls = 1; | |
1491 | break; | |
1492 | ||
1493 | case 'V': | |
1494 | fprintf(stderr, "btreplay -- version %s\n", | |
1495 | my_btversion); | |
1496 | fprintf(stderr, " Built on %s\n", | |
1497 | build_date); | |
1498 | exit(0); | |
1499 | /*NOTREACHED*/ | |
1500 | ||
1501 | case 'v': | |
1502 | verbose++; | |
1503 | break; | |
1504 | ||
1505 | case 'W': | |
1506 | write_enabled = 1; | |
1507 | break; | |
1508 | ||
1509 | default: | |
1510 | usage(); | |
1511 | fatal(NULL, ERR_ARGS, | |
1512 | "Invalid command line argument %c\n", c); | |
1513 | /*NOTREACHED*/ | |
1514 | } | |
1515 | } | |
1516 | ||
1517 | while (optind < argc) | |
1518 | add_input_dev(argv[optind++]); | |
1519 | ||
1520 | if (find_records) | |
1521 | find_input_devs(idir); | |
1522 | ||
1523 | if (list_len(&input_devs) == 0) { | |
1524 | fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n"); | |
1525 | /*NOTREACHED*/ | |
1526 | } | |
1527 | ||
1528 | if (cpus_to_use < 0) | |
1529 | cpus_to_use = ncpus; | |
1530 | } | |
1531 | ||
1532 | /* | |
1533 | * ======================================================================== | |
1534 | * ==== MAIN ROUTINE ====================================================== | |
1535 | * ======================================================================== | |
1536 | */ | |
1537 | ||
1538 | /** | |
1539 | * set_signal_done - Signal handler, catches signals & sets signal_done | |
1540 | */ | |
1541 | static void set_signal_done(__attribute__((__unused__))int signum) | |
1542 | { | |
1543 | signal_done = 1; | |
1544 | } | |
1545 | ||
1546 | /** | |
1547 | * main - | |
1548 | * @argc: Number of arguments | |
1549 | * @argv: Array of arguments | |
1550 | */ | |
1551 | int main(int argc, char *argv[]) | |
1552 | { | |
1553 | int i; | |
1554 | struct list_head *p; | |
1555 | ||
1556 | pgsize = getpagesize(); | |
1557 | assert(pgsize > 0); | |
1558 | ||
1559 | setup_signal(SIGINT, set_signal_done); | |
1560 | setup_signal(SIGTERM, set_signal_done); | |
1561 | ||
1562 | get_ncpus(); | |
1563 | handle_args(argc, argv); | |
1564 | find_input_files(); | |
1565 | ||
1566 | nfiles = list_len(&input_files); | |
1567 | __list_for_each(p, &input_files) { | |
1568 | tip_init(list_entry(p, struct thr_info, head)); | |
1569 | } | |
1570 | ||
1571 | wait_replays_ready(); | |
1572 | for (i = 0; i < def_iterations; i++) { | |
1573 | rgenesis = gettime(); | |
1574 | start_iter(); | |
1575 | if (verbose) | |
1576 | fprintf(stderr, "I"); | |
1577 | wait_iters_done(); | |
1578 | } | |
1579 | ||
1580 | wait_replays_done(); | |
1581 | wait_reclaims_done(); | |
1582 | ||
1583 | if (verbose) | |
1584 | fprintf(stderr, "\n"); | |
1585 | ||
1586 | rem_input_files(); | |
1587 | release_map_devs(); | |
1588 | ||
1589 | return 0; | |
1590 | } |