Commit | Line | Data |
---|---|---|
d47a3fec AB |
1 | /* |
2 | * Blktrace replay utility - Play traces back | |
3 | * | |
4 | * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 | */ | |
20 | ||
21 | static char build_date[] = __DATE__ " at "__TIME__; | |
22 | ||
23 | #include <assert.h> | |
24 | #include <errno.h> | |
25 | #include <fcntl.h> | |
26 | #include <libaio.h> | |
27 | #include <pthread.h> | |
28 | #include <sched.h> | |
29 | #include <signal.h> | |
30 | #include <stdio.h> | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | #include <time.h> | |
34 | #include <unistd.h> | |
35 | #include <sys/param.h> | |
36 | #include <sys/stat.h> | |
37 | #include <sys/time.h> | |
38 | #include <sys/types.h> | |
39 | #include <dirent.h> | |
65a7043b | 40 | #include <stdarg.h> |
d47a3fec AB |
41 | |
42 | #if !defined(_GNU_SOURCE) | |
43 | # define _GNU_SOURCE | |
44 | #endif | |
45 | #include <getopt.h> | |
46 | ||
47 | #include "list.h" | |
48 | #include "btrecord.h" | |
49 | ||
50 | /* | |
51 | * ======================================================================== | |
52 | * ==== STRUCTURE DEFINITIONS ============================================= | |
53 | * ======================================================================== | |
54 | */ | |
55 | ||
56 | /** | |
57 | * Each device map has one of these: | |
58 | * | |
59 | * @head: Linked on to map_devs | |
60 | * @from_dev: Device name as seen on recorded system | |
61 | * @to_dev: Device name to be used on replay system | |
62 | */ | |
63 | struct map_dev { | |
64 | struct list_head head; | |
65 | char *from_dev, *to_dev; | |
66 | }; | |
67 | ||
68 | /** | |
69 | * Each device name specified has one of these (until threads are created) | |
70 | * | |
71 | * @head: Linked onto input_devs | |
72 | * @devnm: Device name -- 'sd*' | |
73 | */ | |
74 | struct dev_info { | |
75 | struct list_head head; | |
76 | char *devnm; | |
77 | }; | |
78 | ||
79 | /* | |
80 | * Per input file information | |
81 | * | |
82 | * @head: Used to link up on input_files | |
83 | * @free_iocbs: List of free iocb's available for use | |
84 | * @used_iocbs: List of iocb's currently outstanding | |
85 | * @mutex: Mutex used with condition variable to protect volatile values | |
86 | * @cond: Condition variable used when waiting on a volatile value change | |
87 | * @naios_out: Current number of AIOs outstanding on this context | |
88 | * @naios_free: Number of AIOs on the free list (short cut for list_len) | |
89 | * @send_wait: Boolean: When true, the sub thread is waiting on free IOCBs | |
90 | * @reap_wait: Boolean: When true, the rec thread is waiting on used IOCBs | |
91 | * @send_done: Boolean: When true, the sub thread has completed work | |
92 | * @reap_done: Boolean: When true, the rec thread has completed work | |
93 | * @sub_thread: Thread used to submit IOs. | |
94 | * @rec_thread: Thread used to reclaim IOs. | |
95 | * @ctx: IO context | |
96 | * @devnm: Copy of the device name being managed by this thread | |
97 | * @file_name: Full name of the input file | |
98 | * @cpu: CPU this thread is pinned to | |
99 | * @ifd: Input file descriptor | |
100 | * @ofd: Output file descriptor | |
101 | * @iterations: Remaining iterations to process | |
102 | * @vfp: For verbose dumping of actions performed | |
103 | */ | |
104 | struct thr_info { | |
105 | struct list_head head, free_iocbs, used_iocbs; | |
106 | pthread_mutex_t mutex; | |
107 | pthread_cond_t cond; | |
108 | volatile long naios_out, naios_free; | |
109 | volatile int send_wait, reap_wait, send_done, reap_done; | |
110 | pthread_t sub_thread, rec_thread; | |
111 | io_context_t ctx; | |
112 | char *devnm, *file_name; | |
113 | int cpu, ifd, ofd, iterations; | |
114 | FILE *vfp; | |
115 | }; | |
116 | ||
117 | /* | |
118 | * Every Asynchronous IO used has one of these (naios per file/device). | |
119 | * | |
120 | * @iocb: IOCB sent down via io_submit | |
121 | * @head: Linked onto file_list.free_iocbs or file_list.used_iocbs | |
122 | * @tip: Pointer to per-thread information this IO is associated with | |
123 | * @nbytes: Number of bytes in buffer associated with iocb | |
124 | */ | |
125 | struct iocb_pkt { | |
126 | struct iocb iocb; | |
127 | struct list_head head; | |
128 | struct thr_info *tip; | |
129 | int nbytes; | |
130 | }; | |
131 | ||
132 | /* | |
133 | * ======================================================================== | |
134 | * ==== GLOBAL VARIABLES ================================================== | |
135 | * ======================================================================== | |
136 | */ | |
137 | ||
138 | static volatile int signal_done = 0; // Boolean: Signal'ed, need to quit | |
139 | ||
140 | static char *ibase = "replay"; // Input base name | |
141 | static char *idir = "."; // Input directory base | |
142 | static int cpus_to_use = -1; // Number of CPUs to use | |
143 | static int def_iterations = 1; // Default number of iterations | |
144 | static int naios = 512; // Number of AIOs per thread | |
145 | static int ncpus = 0; // Number of CPUs in the system | |
146 | static int verbose = 0; // Boolean: Output some extra info | |
147 | static int write_enabled = 0; // Boolean: Enable writing | |
148 | static __u64 genesis = ~0; // Earliest time seen | |
149 | static __u64 rgenesis; // Our start time | |
150 | static size_t pgsize; // System Page size | |
151 | static int nb_sec = 512; // Number of bytes per sector | |
152 | static LIST_HEAD(input_devs); // List of devices to handle | |
153 | static LIST_HEAD(input_files); // List of input files to handle | |
154 | static LIST_HEAD(map_devs); // List of device maps | |
155 | static int nfiles = 0; // Number of files to handle | |
156 | static int no_stalls = 0; // Boolean: Disable pre-stalls | |
4a7968cc | 157 | static unsigned acc_factor = 1; // Int: Acceleration factor |
d47a3fec AB |
158 | static int find_records = 0; // Boolean: Find record files auto |
159 | ||
160 | /* | |
161 | * Variables managed under control of condition variables. | |
162 | * | |
163 | * n_reclaims_done: Counts number of reclaim threads that have completed. | |
164 | * n_replays_done: Counts number of replay threads that have completed. | |
165 | * n_replays_ready: Counts number of replay threads ready to start. | |
166 | * n_iters_done: Counts number of replay threads done one iteration. | |
167 | * iter_start: Starts an iteration for the replay threads. | |
168 | */ | |
169 | static volatile int n_reclaims_done = 0; | |
170 | static pthread_mutex_t reclaim_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
171 | static pthread_cond_t reclaim_done_cond = PTHREAD_COND_INITIALIZER; | |
172 | ||
173 | static volatile int n_replays_done = 0; | |
174 | static pthread_mutex_t replay_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
175 | static pthread_cond_t replay_done_cond = PTHREAD_COND_INITIALIZER; | |
176 | ||
177 | static volatile int n_replays_ready = 0; | |
178 | static pthread_mutex_t replay_ready_mutex = PTHREAD_MUTEX_INITIALIZER; | |
179 | static pthread_cond_t replay_ready_cond = PTHREAD_COND_INITIALIZER; | |
180 | ||
181 | static volatile int n_iters_done = 0; | |
182 | static pthread_mutex_t iter_done_mutex = PTHREAD_MUTEX_INITIALIZER; | |
183 | static pthread_cond_t iter_done_cond = PTHREAD_COND_INITIALIZER; | |
184 | ||
185 | static volatile int iter_start = 0; | |
186 | static pthread_mutex_t iter_start_mutex = PTHREAD_MUTEX_INITIALIZER; | |
187 | static pthread_cond_t iter_start_cond = PTHREAD_COND_INITIALIZER; | |
188 | ||
189 | /* | |
190 | * ======================================================================== | |
191 | * ==== FORWARD REFERENECES =============================================== | |
192 | * ======================================================================== | |
193 | */ | |
194 | ||
195 | static void *replay_sub(void *arg); | |
196 | static void *replay_rec(void *arg); | |
197 | static char usage_str[]; | |
198 | ||
199 | /* | |
200 | * ======================================================================== | |
201 | * ==== INLINE ROUTINES =================================================== | |
202 | * ======================================================================== | |
203 | */ | |
204 | ||
205 | /* | |
206 | * The 'fatal' macro will output a perror message (if errstring is !NULL) | |
207 | * and display a string (with variable arguments) and then exit with the | |
208 | * specified exit value. | |
209 | */ | |
210 | #define ERR_ARGS 1 | |
211 | #define ERR_SYSCALL 2 | |
65a7043b AB |
212 | static inline void fatal(const char *errstring, const int exitval, |
213 | const char *fmt, ...) | |
214 | { | |
215 | va_list ap; | |
216 | ||
217 | if (errstring) | |
218 | perror(errstring); | |
219 | ||
220 | va_start(ap, fmt); | |
221 | vfprintf(stderr, fmt, ap); | |
222 | va_end(ap); | |
223 | ||
224 | exit(exitval); | |
225 | /*NOTREACHED*/ | |
226 | } | |
d47a3fec AB |
227 | |
228 | static inline long long unsigned du64_to_sec(__u64 du64) | |
229 | { | |
230 | return (long long unsigned)du64 / (1000 * 1000 * 1000); | |
231 | } | |
232 | ||
233 | static inline long long unsigned du64_to_nsec(__u64 du64) | |
234 | { | |
235 | return llabs((long long)du64) % (1000 * 1000 * 1000); | |
236 | } | |
237 | ||
238 | /** | |
239 | * min - Return minimum of two integers | |
240 | */ | |
241 | static inline int min(int a, int b) | |
242 | { | |
243 | return a < b ? a : b; | |
244 | } | |
245 | ||
246 | /** | |
247 | * minl - Return minimum of two longs | |
248 | */ | |
249 | static inline long minl(long a, long b) | |
250 | { | |
251 | return a < b ? a : b; | |
252 | } | |
253 | ||
254 | /** | |
255 | * usage - Display usage string and version | |
256 | */ | |
257 | static inline void usage(void) | |
258 | { | |
259 | fprintf(stderr, "Usage: btreplay -- version %s\n%s", | |
260 | my_btversion, usage_str); | |
261 | } | |
262 | ||
263 | /** | |
264 | * is_send_done - Returns true if sender should quit early | |
265 | * @tip: Per-thread information | |
266 | */ | |
267 | static inline int is_send_done(struct thr_info *tip) | |
268 | { | |
269 | return signal_done || tip->send_done; | |
270 | } | |
271 | ||
272 | /** | |
273 | * is_reap_done - Returns true if reaper should quit early | |
274 | * @tip: Per-thread information | |
275 | */ | |
276 | static inline int is_reap_done(struct thr_info *tip) | |
277 | { | |
278 | return tip->send_done && tip->naios_out == 0; | |
279 | } | |
280 | ||
281 | /** | |
282 | * ts2ns - Convert timespec values to a nanosecond value | |
283 | */ | |
284 | #define NS_TICKS ((__u64)1000 * (__u64)1000 * (__u64)1000) | |
285 | static inline __u64 ts2ns(struct timespec *ts) | |
286 | { | |
287 | return ((__u64)(ts->tv_sec) * NS_TICKS) + (__u64)(ts->tv_nsec); | |
288 | } | |
289 | ||
290 | /** | |
291 | * ts2ns - Convert timeval values to a nanosecond value | |
292 | */ | |
293 | static inline __u64 tv2ns(struct timeval *tp) | |
294 | { | |
295 | return ((__u64)(tp->tv_sec)) + ((__u64)(tp->tv_usec) * (__u64)1000); | |
296 | } | |
297 | ||
298 | /** | |
299 | * touch_memory - Force physical memory to be allocating it | |
300 | * | |
301 | * For malloc()ed memory we need to /touch/ it to make it really | |
302 | * exist. Otherwise, for write's (to storage) things may not work | |
303 | * as planned - we see Linux just use a single area to /read/ from | |
304 | * (as there isn't any memory that has been associated with the | |
305 | * allocated virtual addresses yet). | |
306 | */ | |
307 | static inline void touch_memory(char *buf, size_t bsize) | |
308 | { | |
309 | #if defined(PREP_BUFS) | |
310 | memset(buf, 0, bsize); | |
311 | #else | |
312 | size_t i; | |
313 | ||
314 | for (i = 0; i < bsize; i += pgsize) | |
315 | buf[i] = 0; | |
316 | #endif | |
317 | } | |
318 | ||
319 | /** | |
320 | * buf_alloc - Returns a page-aligned buffer of the specified size | |
321 | * @nbytes: Number of bytes to allocate | |
322 | */ | |
323 | static inline void *buf_alloc(size_t nbytes) | |
324 | { | |
325 | void *buf; | |
326 | ||
327 | if (posix_memalign(&buf, pgsize, nbytes)) { | |
328 | fatal("posix_memalign", ERR_SYSCALL, "Allocation failed\n"); | |
329 | /*NOTREACHED*/ | |
330 | } | |
331 | ||
332 | return buf; | |
333 | } | |
334 | ||
335 | /** | |
336 | * gettime - Returns current time | |
337 | */ | |
338 | static inline __u64 gettime(void) | |
339 | { | |
340 | static int use_clock_gettime = -1; // Which clock to use | |
341 | ||
342 | if (use_clock_gettime < 0) { | |
343 | use_clock_gettime = clock_getres(CLOCK_MONOTONIC, NULL) == 0; | |
344 | if (use_clock_gettime) { | |
345 | struct timespec ts = { | |
346 | .tv_sec = 0, | |
347 | .tv_nsec = 0 | |
348 | }; | |
349 | clock_settime(CLOCK_MONOTONIC, &ts); | |
350 | } | |
351 | } | |
352 | ||
353 | if (use_clock_gettime) { | |
354 | struct timespec ts; | |
355 | clock_gettime(CLOCK_MONOTONIC, &ts); | |
356 | return ts2ns(&ts); | |
357 | } | |
358 | else { | |
359 | struct timeval tp; | |
360 | gettimeofday(&tp, NULL); | |
361 | return tv2ns(&tp); | |
362 | } | |
363 | } | |
364 | ||
365 | /** | |
366 | * setup_signal - Set up a signal handler for the specified signum | |
367 | */ | |
368 | static inline void setup_signal(int signum, sighandler_t handler) | |
369 | { | |
370 | if (signal(signum, handler) == SIG_ERR) { | |
371 | fatal("signal", ERR_SYSCALL, "Failed to set signal %d\n", | |
372 | signum); | |
373 | /*NOTREACHED*/ | |
374 | } | |
375 | } | |
376 | ||
377 | /* | |
378 | * ======================================================================== | |
379 | * ==== CONDITION VARIABLE ROUTINES ======================================= | |
380 | * ======================================================================== | |
381 | */ | |
382 | ||
383 | /** | |
384 | * __set_cv - Increments a variable under condition variable control. | |
385 | * @pmp: Pointer to the associated mutex | |
386 | * @pcp: Pointer to the associated condition variable | |
387 | * @vp: Pointer to the variable being incremented | |
388 | * @mxv: Max value for variable (Used only when ASSERTS are on) | |
389 | */ | |
390 | static inline void __set_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp, | |
391 | volatile int *vp, | |
392 | __attribute__((__unused__))int mxv) | |
393 | { | |
394 | pthread_mutex_lock(pmp); | |
395 | assert(*vp < mxv); | |
396 | *vp += 1; | |
397 | pthread_cond_signal(pcp); | |
398 | pthread_mutex_unlock(pmp); | |
399 | } | |
400 | ||
401 | /** | |
402 | * __wait_cv - Waits for a variable under cond var control to hit a value | |
403 | * @pmp: Pointer to the associated mutex | |
404 | * @pcp: Pointer to the associated condition variable | |
405 | * @vp: Pointer to the variable being incremented | |
406 | * @mxv: Value to wait for | |
407 | */ | |
408 | static inline void __wait_cv(pthread_mutex_t *pmp, pthread_cond_t *pcp, | |
409 | volatile int *vp, int mxv) | |
410 | { | |
411 | pthread_mutex_lock(pmp); | |
412 | while (*vp < mxv) | |
413 | pthread_cond_wait(pcp, pmp); | |
414 | *vp = 0; | |
415 | pthread_mutex_unlock(pmp); | |
416 | } | |
417 | ||
418 | static inline void set_reclaim_done(void) | |
419 | { | |
420 | __set_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done, | |
421 | nfiles); | |
422 | } | |
423 | ||
424 | static inline void wait_reclaims_done(void) | |
425 | { | |
426 | __wait_cv(&reclaim_done_mutex, &reclaim_done_cond, &n_reclaims_done, | |
427 | nfiles); | |
428 | } | |
429 | ||
430 | static inline void set_replay_ready(void) | |
431 | { | |
432 | __set_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready, | |
433 | nfiles); | |
434 | } | |
435 | ||
436 | static inline void wait_replays_ready(void) | |
437 | { | |
438 | __wait_cv(&replay_ready_mutex, &replay_ready_cond, &n_replays_ready, | |
439 | nfiles); | |
440 | } | |
441 | ||
442 | static inline void set_replay_done(void) | |
443 | { | |
444 | __set_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done, | |
445 | nfiles); | |
446 | } | |
447 | ||
448 | static inline void wait_replays_done(void) | |
449 | { | |
450 | __wait_cv(&replay_done_mutex, &replay_done_cond, &n_replays_done, | |
451 | nfiles); | |
452 | } | |
453 | ||
454 | static inline void set_iter_done(void) | |
455 | { | |
456 | __set_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done, | |
457 | nfiles); | |
458 | } | |
459 | ||
460 | static inline void wait_iters_done(void) | |
461 | { | |
462 | __wait_cv(&iter_done_mutex, &iter_done_cond, &n_iters_done, | |
463 | nfiles); | |
464 | } | |
465 | ||
466 | /** | |
467 | * wait_iter_start - Wait for an iteration to start | |
468 | * | |
469 | * This is /slightly/ different: we are waiting for a value to become | |
470 | * non-zero, and then we decrement it and go on. | |
471 | */ | |
472 | static inline void wait_iter_start(void) | |
473 | { | |
474 | pthread_mutex_lock(&iter_start_mutex); | |
475 | while (iter_start == 0) | |
476 | pthread_cond_wait(&iter_start_cond, &iter_start_mutex); | |
477 | assert(1 <= iter_start && iter_start <= nfiles); | |
478 | iter_start--; | |
479 | pthread_mutex_unlock(&iter_start_mutex); | |
480 | } | |
481 | ||
482 | /** | |
483 | * start_iter - Start an iteration at the replay thread level | |
484 | */ | |
485 | static inline void start_iter(void) | |
486 | { | |
487 | pthread_mutex_lock(&iter_start_mutex); | |
488 | assert(iter_start == 0); | |
489 | iter_start = nfiles; | |
490 | pthread_cond_broadcast(&iter_start_cond); | |
491 | pthread_mutex_unlock(&iter_start_mutex); | |
492 | } | |
493 | ||
494 | /* | |
495 | * ======================================================================== | |
496 | * ==== CPU RELATED ROUTINES ============================================== | |
497 | * ======================================================================== | |
498 | */ | |
499 | ||
500 | /** | |
501 | * get_ncpus - Sets up the global 'ncpus' value | |
502 | */ | |
503 | static void get_ncpus(void) | |
504 | { | |
fb697494 NZ |
505 | #ifdef _SC_NPROCESSORS_CONF |
506 | ncpus = sysconf(_SC_NPROCESSORS_CONF); | |
507 | #else | |
0a915aab NZ |
508 | int nrcpus = 4096; |
509 | cpu_set_t * cpus; | |
510 | ||
511 | realloc: | |
512 | cpus = CPU_ALLOC(nrcpus); | |
513 | size = CPU_ALLOC_SIZE(nrcpus); | |
514 | CPU_ZERO_S(size, cpus); | |
515 | ||
516 | if (sched_getaffinity(getpid(), size, cpus)) { | |
517 | if( errno == EINVAL && nrcpus < (4096<<4) ) { | |
518 | CPU_FREE(cpus); | |
2564a602 | 519 | nrcpus <<= 1; |
0a915aab NZ |
520 | goto realloc; |
521 | } | |
d47a3fec AB |
522 | fatal("sched_getaffinity", ERR_SYSCALL, "Can't get CPU info\n"); |
523 | /*NOTREACHED*/ | |
524 | } | |
525 | ||
0c2df13d NZ |
526 | ncpus = -1; |
527 | for (last_cpu = 0; last_cpu < CPU_SETSIZE && CPU_ISSET(last_cpu, &cpus); last_cpu++) | |
528 | if (CPU_ISSET( last_cpu, &cpus) ) | |
529 | ncpus = last_cpu; | |
530 | ncpus++; | |
0a915aab | 531 | CPU_FREE(cpus); |
fb697494 | 532 | #endif |
d47a3fec AB |
533 | if (ncpus == 0) { |
534 | fatal(NULL, ERR_SYSCALL, "Insufficient number of CPUs\n"); | |
535 | /*NOTREACHED*/ | |
536 | } | |
537 | } | |
538 | ||
539 | /** | |
540 | * pin_to_cpu - Pin this thread to a specific CPU | |
541 | * @tip: Thread information | |
542 | */ | |
543 | static void pin_to_cpu(struct thr_info *tip) | |
544 | { | |
0a915aab NZ |
545 | cpu_set_t *cpus; |
546 | size_t size; | |
547 | ||
548 | cpus = CPU_ALLOC(ncpus); | |
549 | size = CPU_ALLOC_SIZE(ncpus); | |
d47a3fec AB |
550 | |
551 | assert(0 <= tip->cpu && tip->cpu < ncpus); | |
552 | ||
0a915aab NZ |
553 | CPU_ZERO_S(ncpus, cpus); |
554 | CPU_SET_S(tip->cpu, size, cpus); | |
555 | if (sched_setaffinity(getpid(), size, cpus)) { | |
d47a3fec AB |
556 | fatal("sched_setaffinity", ERR_SYSCALL, "Failed to pin CPU\n"); |
557 | /*NOTREACHED*/ | |
558 | } | |
559 | ||
560 | if (verbose > 1) { | |
561 | int i; | |
0a915aab | 562 | cpu_set_t *now = CPU_ALLOC(ncpus); |
d47a3fec | 563 | |
0a915aab | 564 | (void)sched_getaffinity(getpid(), size, now); |
d47a3fec AB |
565 | fprintf(tip->vfp, "Pinned to CPU %02d ", tip->cpu); |
566 | for (i = 0; i < ncpus; i++) | |
0a915aab | 567 | fprintf(tip->vfp, "%1d", CPU_ISSET_S(i, size, now)); |
d47a3fec AB |
568 | fprintf(tip->vfp, "\n"); |
569 | } | |
570 | } | |
571 | ||
572 | /* | |
573 | * ======================================================================== | |
574 | * ==== INPUT DEVICE HANDLERS ============================================= | |
575 | * ======================================================================== | |
576 | */ | |
577 | ||
578 | /** | |
579 | * add_input_dev - Add a device ('sd*') to the list of devices to handle | |
580 | */ | |
581 | static void add_input_dev(char *devnm) | |
582 | { | |
583 | struct list_head *p; | |
584 | struct dev_info *dip; | |
585 | ||
586 | __list_for_each(p, &input_devs) { | |
587 | dip = list_entry(p, struct dev_info, head); | |
588 | if (strcmp(dip->devnm, devnm) == 0) | |
589 | return; | |
590 | } | |
591 | ||
592 | dip = malloc(sizeof(*dip)); | |
593 | dip->devnm = strdup(devnm); | |
594 | list_add_tail(&dip->head, &input_devs); | |
595 | } | |
596 | ||
597 | /** | |
598 | * rem_input_dev - Remove resources associated with this device | |
599 | */ | |
600 | static void rem_input_dev(struct dev_info *dip) | |
601 | { | |
602 | list_del(&dip->head); | |
603 | free(dip->devnm); | |
604 | free(dip); | |
605 | } | |
606 | ||
607 | static void find_input_devs(char *idir) | |
608 | { | |
609 | struct dirent *ent; | |
610 | DIR *dir = opendir(idir); | |
611 | ||
612 | if (dir == NULL) { | |
613 | fatal(idir, ERR_ARGS, "Unable to open %s\n", idir); | |
614 | /*NOTREACHED*/ | |
615 | } | |
616 | ||
617 | while ((ent = readdir(dir)) != NULL) { | |
6ca1e530 | 618 | char *p, *dsf; |
d47a3fec AB |
619 | |
620 | if (strstr(ent->d_name, ".replay.") == NULL) | |
621 | continue; | |
622 | ||
623 | dsf = strdup(ent->d_name); | |
624 | p = index(dsf, '.'); | |
625 | assert(p != NULL); | |
626 | *p = '\0'; | |
627 | add_input_dev(dsf); | |
628 | free(dsf); | |
629 | } | |
630 | ||
631 | closedir(dir); | |
632 | } | |
633 | ||
634 | /* | |
635 | * ======================================================================== | |
636 | * ==== MAP DEVICE INTERFACES ============================================= | |
637 | * ======================================================================== | |
638 | */ | |
639 | ||
640 | /** | |
641 | * read_map_devs - Read in a set of device mapping from the provided file. | |
642 | * @file_name: File containing device maps | |
643 | * | |
644 | * We support the notion of multiple such files being specifed on the cmd line | |
645 | */ | |
646 | static void read_map_devs(char *file_name) | |
647 | { | |
648 | FILE *fp; | |
dd093eb1 | 649 | char from_dev[256], to_dev[256]; |
d47a3fec AB |
650 | |
651 | fp = fopen(file_name, "r"); | |
652 | if (!fp) { | |
653 | fatal(file_name, ERR_SYSCALL, "Could not open map devs file\n"); | |
654 | /*NOTREACHED*/ | |
655 | } | |
656 | ||
dd093eb1 | 657 | while (fscanf(fp, "%s %s", from_dev, to_dev) == 2) { |
d47a3fec AB |
658 | struct map_dev *mdp = malloc(sizeof(*mdp)); |
659 | ||
660 | mdp->from_dev = from_dev; | |
661 | mdp->to_dev = to_dev; | |
662 | list_add_tail(&mdp->head, &map_devs); | |
663 | } | |
664 | ||
665 | fclose(fp); | |
666 | } | |
667 | ||
668 | /** | |
669 | * release_map_devs - Release resources associated with device mappings. | |
670 | */ | |
671 | static void release_map_devs(void) | |
672 | { | |
673 | struct list_head *p, *q; | |
674 | ||
675 | list_for_each_safe(p, q, &map_devs) { | |
676 | struct map_dev *mdp = list_entry(p, struct map_dev, head); | |
677 | ||
678 | list_del(&mdp->head); | |
679 | ||
680 | free(mdp->from_dev); | |
681 | free(mdp->to_dev); | |
682 | free(mdp); | |
683 | } | |
684 | } | |
685 | ||
686 | /** | |
687 | * map_dev - Return the mapped device for that specified | |
688 | * @from_dev: Device name as seen on recorded system | |
689 | * | |
690 | * Note: If there is no such mapping, we return the same name. | |
691 | */ | |
692 | static char *map_dev(char *from_dev) | |
693 | { | |
694 | struct list_head *p; | |
695 | ||
696 | __list_for_each(p, &map_devs) { | |
697 | struct map_dev *mdp = list_entry(p, struct map_dev, head); | |
698 | ||
699 | if (strcmp(from_dev, mdp->from_dev) == 0) | |
700 | return mdp->to_dev; | |
701 | } | |
702 | ||
703 | return from_dev; | |
704 | } | |
705 | ||
706 | /* | |
707 | * ======================================================================== | |
708 | * ==== IOCB MANAGEMENT ROUTINES ========================================== | |
709 | * ======================================================================== | |
710 | */ | |
711 | ||
712 | /** | |
713 | * iocb_init - Initialize the fields of an IOCB | |
714 | * @tip: Per-thread information | |
715 | * iocbp: IOCB pointer to update | |
716 | */ | |
717 | static void iocb_init(struct thr_info *tip, struct iocb_pkt *iocbp) | |
718 | { | |
719 | iocbp->tip = tip; | |
720 | iocbp->nbytes = 0; | |
721 | iocbp->iocb.u.c.buf = NULL; | |
722 | } | |
723 | ||
724 | /** | |
725 | * iocb_setup - Set up an iocb with this AIOs information | |
726 | * @iocbp: IOCB pointer to update | |
727 | * @rw: Direction (0 == write, 1 == read) | |
728 | * @n: Number of bytes to transfer | |
729 | * @off: Offset (in bytes) | |
730 | */ | |
731 | static void iocb_setup(struct iocb_pkt *iocbp, int rw, int n, long long off) | |
732 | { | |
733 | char *buf; | |
734 | struct iocb *iop = &iocbp->iocb; | |
735 | ||
736 | assert(rw == 0 || rw == 1); | |
737 | assert(0 < n && (n % nb_sec) == 0); | |
738 | assert(0 <= off); | |
739 | ||
740 | if (iocbp->nbytes) { | |
741 | if (iocbp->nbytes >= n) { | |
742 | buf = iop->u.c.buf; | |
743 | goto prep; | |
744 | } | |
745 | ||
746 | assert(iop->u.c.buf); | |
747 | free(iop->u.c.buf); | |
748 | } | |
749 | ||
750 | buf = buf_alloc(n); | |
751 | iocbp->nbytes = n; | |
752 | ||
753 | prep: | |
754 | if (rw) | |
755 | io_prep_pread(iop, iocbp->tip->ofd, buf, n, off); | |
756 | else { | |
757 | assert(write_enabled); | |
758 | io_prep_pwrite(iop, iocbp->tip->ofd, buf, n, off); | |
759 | touch_memory(buf, n); | |
760 | } | |
761 | ||
762 | iop->data = iocbp; | |
763 | } | |
764 | ||
765 | /* | |
766 | * ======================================================================== | |
767 | * ==== PER-THREAD SET UP & TEAR DOWN ===================================== | |
768 | * ======================================================================== | |
769 | */ | |
770 | ||
771 | /** | |
772 | * tip_init - Per thread initialization function | |
773 | */ | |
774 | static void tip_init(struct thr_info *tip) | |
775 | { | |
776 | int i; | |
777 | ||
778 | INIT_LIST_HEAD(&tip->free_iocbs); | |
779 | INIT_LIST_HEAD(&tip->used_iocbs); | |
780 | ||
781 | pthread_mutex_init(&tip->mutex, NULL); | |
782 | pthread_cond_init(&tip->cond, NULL); | |
783 | ||
784 | if (io_setup(naios, &tip->ctx)) { | |
785 | fatal("io_setup", ERR_SYSCALL, "io_setup failed\n"); | |
786 | /*NOTREACHED*/ | |
787 | } | |
788 | ||
789 | tip->ofd = -1; | |
790 | tip->naios_out = 0; | |
791 | tip->send_done = tip->reap_done = 0; | |
792 | tip->send_wait = tip->reap_wait = 0; | |
793 | ||
794 | memset(&tip->sub_thread, 0, sizeof(tip->sub_thread)); | |
795 | memset(&tip->rec_thread, 0, sizeof(tip->rec_thread)); | |
796 | ||
797 | for (i = 0; i < naios; i++) { | |
798 | struct iocb_pkt *iocbp = buf_alloc(sizeof(*iocbp)); | |
799 | ||
800 | iocb_init(tip, iocbp); | |
801 | list_add_tail(&iocbp->head, &tip->free_iocbs); | |
802 | } | |
803 | tip->naios_free = naios; | |
804 | ||
805 | if (verbose > 1) { | |
806 | char fn[MAXPATHLEN]; | |
807 | ||
808 | sprintf(fn, "%s/%s.%s.%d.rep", idir, tip->devnm, ibase, | |
809 | tip->cpu); | |
810 | tip->vfp = fopen(fn, "w"); | |
811 | if (!tip->vfp) { | |
812 | fatal(fn, ERR_SYSCALL, "Failed to open report\n"); | |
813 | /*NOTREACHED*/ | |
814 | } | |
815 | ||
816 | setlinebuf(tip->vfp); | |
817 | } | |
818 | ||
819 | if (pthread_create(&tip->sub_thread, NULL, replay_sub, tip)) { | |
820 | fatal("pthread_create", ERR_SYSCALL, | |
821 | "thread create failed\n"); | |
822 | /*NOTREACHED*/ | |
823 | } | |
824 | ||
825 | if (pthread_create(&tip->rec_thread, NULL, replay_rec, tip)) { | |
826 | fatal("pthread_create", ERR_SYSCALL, | |
827 | "thread create failed\n"); | |
828 | /*NOTREACHED*/ | |
829 | } | |
830 | } | |
831 | ||
832 | /** | |
833 | * tip_release - Release resources associated with this thread | |
834 | */ | |
835 | static void tip_release(struct thr_info *tip) | |
836 | { | |
837 | struct list_head *p, *q; | |
838 | ||
839 | assert(tip->send_done); | |
840 | assert(tip->reap_done); | |
841 | assert(list_len(&tip->used_iocbs) == 0); | |
842 | assert(tip->naios_free == naios); | |
843 | ||
844 | if (pthread_join(tip->sub_thread, NULL)) { | |
845 | fatal("pthread_join", ERR_SYSCALL, "pthread sub join failed\n"); | |
846 | /*NOTREACHED*/ | |
847 | } | |
848 | if (pthread_join(tip->rec_thread, NULL)) { | |
849 | fatal("pthread_join", ERR_SYSCALL, "pthread rec join failed\n"); | |
850 | /*NOTREACHED*/ | |
851 | } | |
852 | ||
853 | io_destroy(tip->ctx); | |
854 | ||
855 | list_splice(&tip->used_iocbs, &tip->free_iocbs); | |
856 | list_for_each_safe(p, q, &tip->free_iocbs) { | |
857 | struct iocb_pkt *iocbp = list_entry(p, struct iocb_pkt, head); | |
858 | ||
859 | list_del(&iocbp->head); | |
860 | if (iocbp->nbytes) | |
861 | free(iocbp->iocb.u.c.buf); | |
862 | free(iocbp); | |
863 | } | |
864 | ||
865 | pthread_cond_destroy(&tip->cond); | |
866 | pthread_mutex_destroy(&tip->mutex); | |
867 | } | |
868 | ||
869 | /** | |
870 | * add_input_file - Allocate and initialize per-input file structure | |
871 | * @cpu: CPU for this file | |
872 | * @devnm: Device name for this file | |
873 | * @file_name: Fully qualifed input file name | |
874 | */ | |
875 | static void add_input_file(int cpu, char *devnm, char *file_name) | |
876 | { | |
877 | struct stat buf; | |
878 | struct io_file_hdr hdr; | |
879 | struct thr_info *tip = buf_alloc(sizeof(*tip)); | |
880 | __u64 my_version = mk_btversion(btver_mjr, btver_mnr, btver_sub); | |
881 | ||
882 | assert(0 <= cpu && cpu < ncpus); | |
883 | ||
884 | memset(&hdr, 0, sizeof(hdr)); | |
885 | memset(tip, 0, sizeof(*tip)); | |
886 | tip->cpu = cpu % cpus_to_use; | |
887 | tip->iterations = def_iterations; | |
888 | ||
889 | tip->ifd = open(file_name, O_RDONLY); | |
890 | if (tip->ifd < 0) { | |
891 | fatal(file_name, ERR_ARGS, "Unable to open\n"); | |
892 | /*NOTREACHED*/ | |
893 | } | |
894 | if (fstat(tip->ifd, &buf) < 0) { | |
895 | fatal(file_name, ERR_SYSCALL, "fstat failed\n"); | |
896 | /*NOTREACHED*/ | |
897 | } | |
898 | if (buf.st_size < (off_t)sizeof(hdr)) { | |
899 | if (verbose) | |
900 | fprintf(stderr, "\t%s empty\n", file_name); | |
901 | goto empty_file; | |
902 | } | |
903 | ||
904 | if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) { | |
905 | fatal(file_name, ERR_ARGS, "Header read failed\n"); | |
906 | /*NOTREACHED*/ | |
907 | } | |
908 | ||
909 | if (hdr.version != my_version) { | |
910 | fprintf(stderr, "%llx %llx %llx %llx\n", | |
911 | (long long unsigned)hdr.version, | |
912 | (long long unsigned)hdr.genesis, | |
913 | (long long unsigned)hdr.nbunches, | |
914 | (long long unsigned)hdr.total_pkts); | |
915 | fatal(NULL, ERR_ARGS, | |
916 | "BT version mismatch: %lx versus my %lx\n", | |
917 | (long)hdr.version, (long)my_version); | |
918 | ||
919 | } | |
920 | ||
921 | if (hdr.nbunches == 0) { | |
922 | empty_file: | |
923 | close(tip->ifd); | |
924 | free(tip); | |
925 | return; | |
926 | } | |
927 | ||
928 | if (hdr.genesis < genesis) { | |
929 | if (verbose > 1) | |
930 | fprintf(stderr, "Setting genesis to %llu.%llu\n", | |
931 | du64_to_sec(hdr.genesis), | |
932 | du64_to_nsec(hdr.genesis)); | |
933 | genesis = hdr.genesis; | |
934 | } | |
935 | ||
936 | tip->devnm = strdup(devnm); | |
937 | tip->file_name = strdup(file_name); | |
938 | ||
939 | list_add_tail(&tip->head, &input_files); | |
940 | ||
941 | if (verbose) | |
942 | fprintf(stderr, "Added %s %llu\n", file_name, | |
943 | (long long)hdr.genesis); | |
944 | } | |
945 | ||
946 | /** | |
947 | * rem_input_file - Release resources associated with an input file | |
948 | * @tip: Per-input file information | |
949 | */ | |
950 | static void rem_input_file(struct thr_info *tip) | |
951 | { | |
952 | list_del(&tip->head); | |
953 | ||
954 | tip_release(tip); | |
955 | ||
956 | close(tip->ofd); | |
957 | close(tip->ifd); | |
958 | free(tip->file_name); | |
959 | free(tip->devnm); | |
960 | free(tip); | |
961 | } | |
962 | ||
963 | /** | |
964 | * rem_input_files - Remove all input files | |
965 | */ | |
966 | static void rem_input_files(void) | |
967 | { | |
968 | struct list_head *p, *q; | |
969 | ||
970 | list_for_each_safe(p, q, &input_files) { | |
971 | rem_input_file(list_entry(p, struct thr_info, head)); | |
972 | } | |
973 | } | |
974 | ||
975 | /** | |
976 | * __find_input_files - Find input files associated with this device (per cpu) | |
977 | */ | |
978 | static void __find_input_files(struct dev_info *dip) | |
979 | { | |
980 | int cpu = 0; | |
981 | ||
982 | for (;;) { | |
983 | char full_name[MAXPATHLEN]; | |
984 | ||
985 | sprintf(full_name, "%s/%s.%s.%d", idir, dip->devnm, ibase, cpu); | |
986 | if (access(full_name, R_OK) != 0) | |
987 | break; | |
988 | ||
989 | add_input_file(cpu, dip->devnm, full_name); | |
990 | cpu++; | |
991 | } | |
992 | ||
993 | if (!cpu) { | |
994 | fatal(NULL, ERR_ARGS, "No traces found for %s\n", dip->devnm); | |
995 | /*NOTREACHED*/ | |
996 | } | |
997 | ||
998 | rem_input_dev(dip); | |
999 | } | |
1000 | ||
1001 | ||
1002 | /** | |
1003 | * find_input_files - Find input files for all devices | |
1004 | */ | |
1005 | static void find_input_files(void) | |
1006 | { | |
1007 | struct list_head *p, *q; | |
1008 | ||
1009 | list_for_each_safe(p, q, &input_devs) { | |
1010 | __find_input_files(list_entry(p, struct dev_info, head)); | |
1011 | } | |
1012 | } | |
1013 | ||
1014 | /* | |
1015 | * ======================================================================== | |
1016 | * ==== RECLAIM ROUTINES ================================================== | |
1017 | * ======================================================================== | |
1018 | */ | |
1019 | ||
1020 | /** | |
1021 | * reap_wait_aios - Wait for and return number of outstanding AIOs | |
1022 | * | |
1023 | * Will return 0 if we are done | |
1024 | */ | |
1025 | static int reap_wait_aios(struct thr_info *tip) | |
1026 | { | |
1027 | int naios = 0; | |
1028 | ||
1029 | if (!is_reap_done(tip)) { | |
1030 | pthread_mutex_lock(&tip->mutex); | |
1031 | while (tip->naios_out == 0) { | |
1032 | tip->reap_wait = 1; | |
1033 | if (pthread_cond_wait(&tip->cond, &tip->mutex)) { | |
1034 | fatal("pthread_cond_wait", ERR_SYSCALL, | |
1035 | "nfree_current cond wait failed\n"); | |
1036 | /*NOTREACHED*/ | |
1037 | } | |
1038 | } | |
1039 | naios = tip->naios_out; | |
1040 | pthread_mutex_unlock(&tip->mutex); | |
1041 | } | |
1042 | assert(is_reap_done(tip) || naios > 0); | |
1043 | ||
1044 | return is_reap_done(tip) ? 0 : naios; | |
1045 | } | |
1046 | ||
1047 | /** | |
1048 | * reclaim_ios - Reclaim AIOs completed, recycle IOCBs | |
1049 | * @tip: Per-thread information | |
1050 | * @naios_out: Number of AIOs we have outstanding (min) | |
1051 | */ | |
1052 | static void reclaim_ios(struct thr_info *tip, long naios_out) | |
1053 | { | |
1054 | long i, ndone; | |
1055 | struct io_event *evp, events[naios_out]; | |
1056 | ||
1057 | again: | |
1058 | assert(naios > 0); | |
1059 | for (;;) { | |
1060 | ndone = io_getevents(tip->ctx, 1, naios_out, events, NULL); | |
1061 | if (ndone > 0) | |
1062 | break; | |
1063 | ||
1064 | if (errno && errno != EINTR) { | |
1065 | fatal("io_getevents", ERR_SYSCALL, | |
1066 | "io_getevents failed\n"); | |
1067 | /*NOTREACHED*/ | |
1068 | } | |
1069 | } | |
1070 | assert(0 < ndone && ndone <= naios_out); | |
1071 | ||
1072 | pthread_mutex_lock(&tip->mutex); | |
1073 | for (i = 0, evp = events; i < ndone; i++, evp++) { | |
1074 | struct iocb_pkt *iocbp = evp->data; | |
1075 | ||
1076 | if (evp->res != iocbp->iocb.u.c.nbytes) { | |
1077 | fatal(NULL, ERR_SYSCALL, | |
1078 | "Event failure %ld/%ld\t(%ld + %ld)\n", | |
1079 | (long)evp->res, (long)evp->res2, | |
1080 | (long)iocbp->iocb.u.c.offset / nb_sec, | |
1081 | (long)iocbp->iocb.u.c.nbytes / nb_sec); | |
1082 | /*NOTREACHED*/ | |
1083 | } | |
1084 | ||
1085 | list_move_tail(&iocbp->head, &tip->free_iocbs); | |
1086 | } | |
1087 | ||
1088 | tip->naios_free += ndone; | |
1089 | tip->naios_out -= ndone; | |
1090 | naios_out = minl(naios_out, tip->naios_out); | |
1091 | ||
1092 | if (tip->send_wait) { | |
1093 | tip->send_wait = 0; | |
1094 | pthread_cond_signal(&tip->cond); | |
1095 | } | |
1096 | pthread_mutex_unlock(&tip->mutex); | |
1097 | ||
1098 | /* | |
1099 | * Short cut: If we /know/ there are some more AIOs, go handle them | |
1100 | */ | |
1101 | if (naios_out) | |
1102 | goto again; | |
1103 | } | |
1104 | ||
1105 | /** | |
1106 | * replay_rec - Worker thread to reclaim AIOs | |
1107 | * @arg: Pointer to thread information | |
1108 | */ | |
1109 | static void *replay_rec(void *arg) | |
1110 | { | |
1111 | long naios_out; | |
1112 | struct thr_info *tip = arg; | |
1113 | ||
1114 | while ((naios_out = reap_wait_aios(tip)) > 0) | |
1115 | reclaim_ios(tip, naios_out); | |
1116 | ||
1117 | assert(tip->send_done); | |
1118 | tip->reap_done = 1; | |
1119 | set_reclaim_done(); | |
1120 | ||
1121 | return NULL; | |
1122 | } | |
1123 | ||
1124 | /* | |
1125 | * ======================================================================== | |
1126 | * ==== REPLAY ROUTINES =================================================== | |
1127 | * ======================================================================== | |
1128 | */ | |
1129 | ||
1130 | /** | |
1131 | * next_bunch - Retrieve next bunch of AIOs to process | |
1132 | * @tip: Per-thread information | |
1133 | * @bunch: Bunch information | |
1134 | * | |
1135 | * Returns TRUE if we recovered a bunch of IOs, else hit EOF | |
1136 | */ | |
1137 | static int next_bunch(struct thr_info *tip, struct io_bunch *bunch) | |
1138 | { | |
1139 | size_t count, result; | |
1140 | ||
1141 | result = read(tip->ifd, &bunch->hdr, sizeof(bunch->hdr)); | |
1142 | if (result != sizeof(bunch->hdr)) { | |
1143 | if (result == 0) | |
1144 | return 0; | |
1145 | ||
1146 | fatal(tip->file_name, ERR_SYSCALL, "Short hdr(%ld)\n", | |
1147 | (long)result); | |
1148 | /*NOTREACHED*/ | |
1149 | } | |
1150 | assert(bunch->hdr.npkts <= BT_MAX_PKTS); | |
1151 | ||
1152 | count = bunch->hdr.npkts * sizeof(struct io_pkt); | |
1153 | result = read(tip->ifd, &bunch->pkts, count); | |
1154 | if (result != count) { | |
1155 | fatal(tip->file_name, ERR_SYSCALL, "Short pkts(%ld/%ld)\n", | |
1156 | (long)result, (long)count); | |
1157 | /*NOTREACHED*/ | |
1158 | } | |
1159 | ||
1160 | return 1; | |
1161 | } | |
1162 | ||
1163 | /** | |
1164 | * nfree_current - Returns current number of AIOs that are free | |
1165 | * | |
1166 | * Will wait for available ones... | |
1167 | * | |
1168 | * Returns 0 if we have some condition that causes us to exit | |
1169 | */ | |
1170 | static int nfree_current(struct thr_info *tip) | |
1171 | { | |
1172 | int nfree = 0; | |
1173 | ||
1174 | pthread_mutex_lock(&tip->mutex); | |
1175 | while (!is_send_done(tip) && ((nfree = tip->naios_free) == 0)) { | |
1176 | tip->send_wait = 1; | |
1177 | if (pthread_cond_wait(&tip->cond, &tip->mutex)) { | |
1178 | fatal("pthread_cond_wait", ERR_SYSCALL, | |
1179 | "nfree_current cond wait failed\n"); | |
1180 | /*NOTREACHED*/ | |
1181 | } | |
1182 | } | |
1183 | pthread_mutex_unlock(&tip->mutex); | |
1184 | ||
1185 | return nfree; | |
1186 | } | |
1187 | ||
1188 | /** | |
1189 | * stall - Stall for the number of nanoseconds requested | |
1190 | * | |
1191 | * We may be late, in which case we just return. | |
1192 | */ | |
1193 | static void stall(struct thr_info *tip, long long oclock) | |
1194 | { | |
1195 | struct timespec req; | |
1196 | long long dreal, tclock = gettime() - rgenesis; | |
1197 | ||
4a7968cc LU |
1198 | oclock /= acc_factor; |
1199 | ||
d47a3fec AB |
1200 | if (verbose > 1) |
1201 | fprintf(tip->vfp, " stall(%lld.%09lld, %lld.%09lld)\n", | |
1202 | du64_to_sec(oclock), du64_to_nsec(oclock), | |
1203 | du64_to_sec(tclock), du64_to_nsec(tclock)); | |
1204 | ||
1205 | while (!is_send_done(tip) && tclock < oclock) { | |
1206 | dreal = oclock - tclock; | |
1207 | req.tv_sec = dreal / (1000 * 1000 * 1000); | |
1208 | req.tv_nsec = dreal % (1000 * 1000 * 1000); | |
1209 | ||
1210 | if (verbose > 1) { | |
1211 | fprintf(tip->vfp, "++ stall(%lld.%09lld) ++\n", | |
1212 | (long long)req.tv_sec, | |
1213 | (long long)req.tv_nsec); | |
1214 | } | |
1215 | ||
1216 | if (nanosleep(&req, NULL) < 0 && signal_done) | |
1217 | break; | |
1218 | ||
1219 | tclock = gettime() - rgenesis; | |
1220 | } | |
1221 | } | |
1222 | ||
1223 | /** | |
1224 | * iocbs_map - Map a set of AIOs onto a set of IOCBs | |
1225 | * @tip: Per-thread information | |
1226 | * @list: List of AIOs created | |
1227 | * @pkts: AIOs to map | |
1228 | * @ntodo: Number of AIOs to map | |
1229 | */ | |
1230 | static void iocbs_map(struct thr_info *tip, struct iocb **list, | |
1231 | struct io_pkt *pkts, int ntodo) | |
1232 | { | |
1233 | int i; | |
1234 | struct io_pkt *pkt; | |
1235 | ||
1236 | assert(0 < ntodo && ntodo <= naios); | |
1237 | ||
1238 | pthread_mutex_lock(&tip->mutex); | |
1239 | assert(ntodo <= list_len(&tip->free_iocbs)); | |
1240 | for (i = 0, pkt = pkts; i < ntodo; i++, pkt++) { | |
1241 | __u32 rw = pkt->rw; | |
1242 | struct iocb_pkt *iocbp; | |
1243 | ||
1244 | if (!pkt->rw && !write_enabled) | |
1245 | rw = 1; | |
1246 | ||
1247 | if (verbose > 1) | |
1248 | fprintf(tip->vfp, "\t%10llu + %10llu %c%c\n", | |
1249 | (unsigned long long)pkt->sector, | |
1250 | (unsigned long long)pkt->nbytes / nb_sec, | |
1251 | rw ? 'R' : 'W', | |
1252 | (rw == 1 && pkt->rw == 0) ? '!' : ' '); | |
1253 | ||
1254 | iocbp = list_entry(tip->free_iocbs.next, struct iocb_pkt, head); | |
1255 | iocb_setup(iocbp, rw, pkt->nbytes, pkt->sector * nb_sec); | |
1256 | ||
1257 | list_move_tail(&iocbp->head, &tip->used_iocbs); | |
1258 | list[i] = &iocbp->iocb; | |
1259 | } | |
1260 | ||
1261 | tip->naios_free -= ntodo; | |
1262 | assert(tip->naios_free >= 0); | |
1263 | pthread_mutex_unlock(&tip->mutex); | |
1264 | } | |
1265 | ||
1266 | /** | |
1267 | * process_bunch - Process a bunch of requests | |
1268 | * @tip: Per-thread information | |
1269 | * @bunch: Bunch to process | |
1270 | */ | |
1271 | static void process_bunch(struct thr_info *tip, struct io_bunch *bunch) | |
1272 | { | |
1273 | __u64 i = 0; | |
1274 | struct iocb *list[bunch->hdr.npkts]; | |
1275 | ||
1276 | assert(0 < bunch->hdr.npkts && bunch->hdr.npkts <= BT_MAX_PKTS); | |
1277 | while (!is_send_done(tip) && (i < bunch->hdr.npkts)) { | |
1278 | long ndone; | |
1279 | int ntodo = min(nfree_current(tip), bunch->hdr.npkts - i); | |
1280 | ||
1281 | assert(0 < ntodo && ntodo <= naios); | |
1282 | iocbs_map(tip, list, &bunch->pkts[i], ntodo); | |
1283 | if (!no_stalls) | |
1284 | stall(tip, bunch->hdr.time_stamp - genesis); | |
1285 | ||
1286 | if (ntodo) { | |
1287 | if (verbose > 1) | |
1288 | fprintf(tip->vfp, "submit(%d)\n", ntodo); | |
1289 | ndone = io_submit(tip->ctx, ntodo, list); | |
1290 | if (ndone != (long)ntodo) { | |
1291 | fatal("io_submit", ERR_SYSCALL, | |
1292 | "%d: io_submit(%d:%ld) failed (%s)\n", | |
1293 | tip->cpu, ntodo, ndone, | |
1294 | strerror(labs(ndone))); | |
1295 | /*NOTREACHED*/ | |
1296 | } | |
1297 | ||
1298 | pthread_mutex_lock(&tip->mutex); | |
1299 | tip->naios_out += ndone; | |
1300 | assert(tip->naios_out <= naios); | |
1301 | if (tip->reap_wait) { | |
1302 | tip->reap_wait = 0; | |
1303 | pthread_cond_signal(&tip->cond); | |
1304 | } | |
1305 | pthread_mutex_unlock(&tip->mutex); | |
1306 | ||
1307 | i += ndone; | |
1308 | assert(i <= bunch->hdr.npkts); | |
1309 | } | |
1310 | } | |
1311 | } | |
1312 | ||
1313 | /** | |
1314 | * reset_input_file - Reset the input file for the next iteration | |
1315 | * @tip: Thread information | |
1316 | * | |
1317 | * We also do a dummy read of the file header to get us to the first bunch. | |
1318 | */ | |
1319 | static void reset_input_file(struct thr_info *tip) | |
1320 | { | |
1321 | struct io_file_hdr hdr; | |
1322 | ||
1323 | lseek(tip->ifd, 0, 0); | |
1324 | ||
1325 | if (read(tip->ifd, &hdr, sizeof(hdr)) != sizeof(hdr)) { | |
1326 | fatal(tip->file_name, ERR_ARGS, "Header reread failed\n"); | |
1327 | /*NOTREACHED*/ | |
1328 | } | |
1329 | } | |
1330 | ||
1331 | /** | |
1332 | * replay_sub - Worker thread to submit AIOs that are being replayed | |
1333 | */ | |
1334 | static void *replay_sub(void *arg) | |
1335 | { | |
a788dfde | 1336 | unsigned int i; |
e41bf0ff | 1337 | char *mdev; |
d47a3fec AB |
1338 | char path[MAXPATHLEN]; |
1339 | struct io_bunch bunch; | |
1340 | struct thr_info *tip = arg; | |
358504bb | 1341 | int oflags; |
d47a3fec AB |
1342 | |
1343 | pin_to_cpu(tip); | |
1344 | ||
e41bf0ff ES |
1345 | mdev = map_dev(tip->devnm); |
1346 | sprintf(path, "/dev/%s", mdev); | |
1347 | /* | |
1348 | * convert underscores to slashes to | |
1349 | * restore device names that have larger paths | |
1350 | */ | |
1351 | for (i = 0; i < strlen(mdev); i++) | |
1352 | if (path[strlen("/dev/") + i] == '_') | |
1353 | path[strlen("/dev/") + i] = '/'; | |
358504bb JA |
1354 | #ifdef O_NOATIME |
1355 | oflags = O_NOATIME; | |
1356 | #else | |
1357 | oflags = 0; | |
1358 | #endif | |
1359 | tip->ofd = open(path, O_RDWR | O_DIRECT | oflags); | |
d47a3fec AB |
1360 | if (tip->ofd < 0) { |
1361 | fatal(path, ERR_SYSCALL, "Failed device open\n"); | |
1362 | /*NOTREACHED*/ | |
1363 | } | |
1364 | ||
1365 | set_replay_ready(); | |
1366 | while (!is_send_done(tip) && tip->iterations--) { | |
1367 | wait_iter_start(); | |
cbb3e69e | 1368 | if (verbose > 1) |
d47a3fec AB |
1369 | fprintf(tip->vfp, "\n=== %d ===\n", tip->iterations); |
1370 | while (!is_send_done(tip) && next_bunch(tip, &bunch)) | |
1371 | process_bunch(tip, &bunch); | |
1372 | set_iter_done(); | |
1373 | reset_input_file(tip); | |
1374 | } | |
1375 | tip->send_done = 1; | |
1376 | set_replay_done(); | |
1377 | ||
1378 | return NULL; | |
1379 | } | |
1380 | ||
1381 | /* | |
1382 | * ======================================================================== | |
1383 | * ==== COMMAND LINE ARGUMENT HANDLING ==================================== | |
1384 | * ======================================================================== | |
1385 | */ | |
1386 | ||
1387 | static char usage_str[] = \ | |
4a7968cc | 1388 | "\n" \ |
d47a3fec AB |
1389 | "\t[ -c <cpus> : --cpus=<cpus> ] Default: 1\n" \ |
1390 | "\t[ -d <dir> : --input-directory=<dir> ] Default: .\n" \ | |
4a7968cc | 1391 | "\t[ -F : --find-records ] Default: Off\n" \ |
d47a3fec AB |
1392 | "\t[ -h : --help ] Default: Off\n" \ |
1393 | "\t[ -i <base> : --input-base=<base> ] Default: replay\n" \ | |
1394 | "\t[ -I <iters>: --iterations=<iters> ] Default: 1\n" \ | |
1395 | "\t[ -M <file> : --map-devs=<file> ] Default: None\n" \ | |
1396 | "\t[ -N : --no-stalls ] Default: Off\n" \ | |
4a7968cc | 1397 | "\t[ -x : --acc-factor ] Default: 1\n" \ |
d47a3fec AB |
1398 | "\t[ -v : --verbose ] Default: Off\n" \ |
1399 | "\t[ -V : --version ] Default: Off\n" \ | |
1400 | "\t[ -W : --write-enable ] Default: Off\n" \ | |
1401 | "\t<dev...> Default: None\n" \ | |
1402 | "\n"; | |
1403 | ||
4a7968cc | 1404 | #define S_OPTS "c:d:Fhi:I:M:Nx:t:vVW" |
d47a3fec AB |
1405 | static struct option l_opts[] = { |
1406 | { | |
1407 | .name = "cpus", | |
1408 | .has_arg = required_argument, | |
1409 | .flag = NULL, | |
1410 | .val = 'c' | |
1411 | }, | |
1412 | { | |
1413 | .name = "input-directory", | |
1414 | .has_arg = required_argument, | |
1415 | .flag = NULL, | |
1416 | .val = 'd' | |
1417 | }, | |
1418 | { | |
1419 | .name = "find-records", | |
1420 | .has_arg = no_argument, | |
1421 | .flag = NULL, | |
1422 | .val = 'F' | |
1423 | }, | |
1424 | { | |
1425 | .name = "help", | |
1426 | .has_arg = no_argument, | |
1427 | .flag = NULL, | |
1428 | .val = 'h' | |
1429 | }, | |
1430 | { | |
1431 | .name = "input-base", | |
1432 | .has_arg = required_argument, | |
1433 | .flag = NULL, | |
1434 | .val = 'i' | |
1435 | }, | |
1436 | { | |
1437 | .name = "iterations", | |
1438 | .has_arg = required_argument, | |
1439 | .flag = NULL, | |
1440 | .val = 'I' | |
1441 | }, | |
1442 | { | |
1443 | .name = "map-devs", | |
1444 | .has_arg = required_argument, | |
1445 | .flag = NULL, | |
1446 | .val = 'M' | |
1447 | }, | |
1448 | { | |
1449 | .name = "no-stalls", | |
1450 | .has_arg = no_argument, | |
1451 | .flag = NULL, | |
1452 | .val = 'N' | |
1453 | }, | |
4a7968cc LU |
1454 | { |
1455 | .name = "acc-factor", | |
1456 | .has_arg = required_argument, | |
1457 | .flag = NULL, | |
1458 | .val = 'x' | |
1459 | }, | |
d47a3fec AB |
1460 | { |
1461 | .name = "verbose", | |
1462 | .has_arg = no_argument, | |
1463 | .flag = NULL, | |
1464 | .val = 'v' | |
1465 | }, | |
1466 | { | |
1467 | .name = "version", | |
1468 | .has_arg = no_argument, | |
1469 | .flag = NULL, | |
1470 | .val = 'V' | |
1471 | }, | |
1472 | { | |
1473 | .name = "write-enable", | |
1474 | .has_arg = no_argument, | |
1475 | .flag = NULL, | |
1476 | .val = 'W' | |
1477 | }, | |
1478 | { | |
1479 | .name = NULL | |
1480 | } | |
1481 | }; | |
1482 | ||
1483 | /** | |
1484 | * handle_args: Parse passed in argument list | |
1485 | * @argc: Number of arguments in argv | |
1486 | * @argv: Arguments passed in | |
1487 | * | |
1488 | * Does rudimentary parameter verification as well. | |
1489 | */ | |
1490 | static void handle_args(int argc, char *argv[]) | |
1491 | { | |
1492 | int c; | |
4a7968cc | 1493 | int r; |
d47a3fec AB |
1494 | |
1495 | while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) { | |
1496 | switch (c) { | |
1497 | case 'c': | |
1498 | cpus_to_use = atoi(optarg); | |
1499 | if (cpus_to_use <= 0 || cpus_to_use > ncpus) { | |
1500 | fatal(NULL, ERR_ARGS, | |
1501 | "Invalid number of cpus %d (0<x<%d)\n", | |
1502 | cpus_to_use, ncpus); | |
1503 | /*NOTREACHED*/ | |
1504 | } | |
1505 | break; | |
1506 | ||
1507 | case 'd': | |
1508 | idir = optarg; | |
1509 | if (access(idir, R_OK | X_OK) != 0) { | |
1510 | fatal(idir, ERR_ARGS, | |
1511 | "Invalid input directory specified\n"); | |
1512 | /*NOTREACHED*/ | |
1513 | } | |
1514 | break; | |
1515 | ||
1516 | case 'F': | |
1517 | find_records = 1; | |
1518 | break; | |
1519 | ||
1520 | case 'h': | |
1521 | usage(); | |
1522 | exit(0); | |
1523 | /*NOTREACHED*/ | |
1524 | ||
1525 | case 'i': | |
1526 | ibase = optarg; | |
1527 | break; | |
1528 | ||
1529 | case 'I': | |
1530 | def_iterations = atoi(optarg); | |
1531 | if (def_iterations <= 0) { | |
1532 | fprintf(stderr, | |
1533 | "Invalid number of iterations %d\n", | |
1534 | def_iterations); | |
1535 | exit(ERR_ARGS); | |
1536 | /*NOTREACHED*/ | |
1537 | } | |
1538 | break; | |
1539 | ||
1540 | case 'M': | |
1541 | read_map_devs(optarg); | |
1542 | break; | |
1543 | ||
1544 | case 'N': | |
1545 | no_stalls = 1; | |
1546 | break; | |
1547 | ||
4a7968cc LU |
1548 | case 'x': |
1549 | r = sscanf(optarg,"%u",&acc_factor); | |
1550 | if (r!=1) { | |
1551 | fprintf(stderr, | |
1552 | "Invalid acceleration factor\n"); | |
1553 | exit(ERR_ARGS); | |
1554 | /*NOTREACHED*/ | |
1555 | } | |
1556 | break; | |
1557 | ||
d47a3fec AB |
1558 | case 'V': |
1559 | fprintf(stderr, "btreplay -- version %s\n", | |
1560 | my_btversion); | |
1561 | fprintf(stderr, " Built on %s\n", | |
1562 | build_date); | |
1563 | exit(0); | |
1564 | /*NOTREACHED*/ | |
1565 | ||
1566 | case 'v': | |
1567 | verbose++; | |
1568 | break; | |
1569 | ||
1570 | case 'W': | |
1571 | write_enabled = 1; | |
1572 | break; | |
1573 | ||
1574 | default: | |
1575 | usage(); | |
1576 | fatal(NULL, ERR_ARGS, | |
1577 | "Invalid command line argument %c\n", c); | |
1578 | /*NOTREACHED*/ | |
1579 | } | |
1580 | } | |
1581 | ||
1582 | while (optind < argc) | |
1583 | add_input_dev(argv[optind++]); | |
1584 | ||
1585 | if (find_records) | |
1586 | find_input_devs(idir); | |
1587 | ||
1588 | if (list_len(&input_devs) == 0) { | |
1589 | fatal(NULL, ERR_ARGS, "Missing required input dev name(s)\n"); | |
1590 | /*NOTREACHED*/ | |
1591 | } | |
1592 | ||
1593 | if (cpus_to_use < 0) | |
1594 | cpus_to_use = ncpus; | |
1595 | } | |
1596 | ||
1597 | /* | |
1598 | * ======================================================================== | |
1599 | * ==== MAIN ROUTINE ====================================================== | |
1600 | * ======================================================================== | |
1601 | */ | |
1602 | ||
1603 | /** | |
1604 | * set_signal_done - Signal handler, catches signals & sets signal_done | |
1605 | */ | |
1606 | static void set_signal_done(__attribute__((__unused__))int signum) | |
1607 | { | |
1608 | signal_done = 1; | |
1609 | } | |
1610 | ||
1611 | /** | |
1612 | * main - | |
1613 | * @argc: Number of arguments | |
1614 | * @argv: Array of arguments | |
1615 | */ | |
1616 | int main(int argc, char *argv[]) | |
1617 | { | |
1618 | int i; | |
1619 | struct list_head *p; | |
1620 | ||
1621 | pgsize = getpagesize(); | |
1622 | assert(pgsize > 0); | |
1623 | ||
1624 | setup_signal(SIGINT, set_signal_done); | |
1625 | setup_signal(SIGTERM, set_signal_done); | |
1626 | ||
1627 | get_ncpus(); | |
1628 | handle_args(argc, argv); | |
1629 | find_input_files(); | |
1630 | ||
1631 | nfiles = list_len(&input_files); | |
1632 | __list_for_each(p, &input_files) { | |
1633 | tip_init(list_entry(p, struct thr_info, head)); | |
1634 | } | |
1635 | ||
1636 | wait_replays_ready(); | |
1637 | for (i = 0; i < def_iterations; i++) { | |
1638 | rgenesis = gettime(); | |
1639 | start_iter(); | |
1640 | if (verbose) | |
1641 | fprintf(stderr, "I"); | |
1642 | wait_iters_done(); | |
1643 | } | |
1644 | ||
1645 | wait_replays_done(); | |
1646 | wait_reclaims_done(); | |
1647 | ||
1648 | if (verbose) | |
1649 | fprintf(stderr, "\n"); | |
1650 | ||
1651 | rem_input_files(); | |
1652 | release_map_devs(); | |
1653 | ||
1654 | return 0; | |
1655 | } |