Commit | Line | Data |
---|---|---|
d0ca268b JA |
1 | /* |
2 | * block queue tracing application | |
3 | * | |
d956a2cd JA |
4 | * Copyright (C) 2005 Jens Axboe <axboe@suse.de> |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
19 | * | |
d0ca268b JA |
20 | */ |
21 | #include <pthread.h> | |
22 | #include <sys/types.h> | |
23 | #include <sys/stat.h> | |
24 | #include <unistd.h> | |
25 | #include <locale.h> | |
26 | #include <signal.h> | |
27 | #include <fcntl.h> | |
28 | #include <string.h> | |
29 | #include <sys/ioctl.h> | |
b9d4294e | 30 | #include <sys/param.h> |
d0ca268b JA |
31 | #include <stdio.h> |
32 | #include <stdlib.h> | |
33 | #include <sched.h> | |
d39c04ca AB |
34 | #include <ctype.h> |
35 | #include <getopt.h> | |
d0ca268b JA |
36 | |
37 | #include "blktrace.h" | |
38 | ||
39 | #define BUF_SIZE (128 *1024) | |
40 | #define BUF_NR (4) | |
41 | ||
d39c04ca AB |
42 | #define DECLARE_MASK_MAP(mask) { BLK_TC_##mask, #mask, "BLK_TC_"#mask } |
43 | #define COMPARE_MASK_MAP(mmp, str) \ | |
75da3c6a NS |
44 | (!strcasecmp((mmp)->short_form, (str)) || \ |
45 | !strcasecmp((mmp)->long_form, (str))) | |
d39c04ca AB |
46 | |
47 | #define VALID_SET(x) ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT))) | |
48 | ||
49 | struct mask_map { | |
50 | int mask; | |
51 | char *short_form; | |
52 | char *long_form; | |
53 | }; | |
54 | ||
55 | struct mask_map mask_maps[] = { | |
5c86134e JA |
56 | DECLARE_MASK_MAP(READ), |
57 | DECLARE_MASK_MAP(WRITE), | |
58 | DECLARE_MASK_MAP(BARRIER), | |
59 | DECLARE_MASK_MAP(SYNC), | |
60 | DECLARE_MASK_MAP(QUEUE), | |
61 | DECLARE_MASK_MAP(REQUEUE), | |
62 | DECLARE_MASK_MAP(ISSUE), | |
63 | DECLARE_MASK_MAP(COMPLETE), | |
64 | DECLARE_MASK_MAP(FS), | |
65 | DECLARE_MASK_MAP(PC), | |
d39c04ca AB |
66 | }; |
67 | ||
bc39777c | 68 | #define S_OPTS "d:a:A:r:o:k" |
d5396421 | 69 | static struct option l_opts[] = { |
5c86134e | 70 | { |
d39c04ca AB |
71 | .name = "dev", |
72 | .has_arg = 1, | |
73 | .flag = NULL, | |
74 | .val = 'd' | |
75 | }, | |
5c86134e | 76 | { |
d39c04ca AB |
77 | .name = "act-mask", |
78 | .has_arg = 1, | |
79 | .flag = NULL, | |
80 | .val = 'a' | |
81 | }, | |
5c86134e | 82 | { |
d39c04ca AB |
83 | .name = "set-mask", |
84 | .has_arg = 1, | |
85 | .flag = NULL, | |
86 | .val = 'A' | |
87 | }, | |
5c86134e | 88 | { |
5270dddd JA |
89 | .name = "relay", |
90 | .has_arg = 1, | |
91 | .flag = NULL, | |
92 | .val = 'r' | |
93 | }, | |
d5396421 JA |
94 | { |
95 | .name = "output", | |
96 | .has_arg = 1, | |
97 | .flag = NULL, | |
98 | .val = 'o' | |
99 | }, | |
bc39777c JA |
100 | { |
101 | .name = "kill", | |
102 | .has_arg = 0, | |
103 | .flag = NULL, | |
104 | .val = 'k' | |
105 | }, | |
d39c04ca AB |
106 | { |
107 | .name = NULL, | |
108 | .has_arg = 0, | |
109 | .flag = NULL, | |
110 | .val = 0 | |
111 | } | |
112 | }; | |
113 | ||
d0ca268b JA |
114 | struct thread_information { |
115 | int cpu; | |
116 | pthread_t thread; | |
b9d4294e JA |
117 | |
118 | int fd; | |
119 | char fn[MAXPATHLEN + 64]; | |
120 | ||
d5396421 JA |
121 | pthread_mutex_t *fd_lock; |
122 | int ofd; | |
123 | ||
d0ca268b JA |
124 | unsigned long events_processed; |
125 | }; | |
126 | ||
5270dddd | 127 | static char *relay_path; |
d0ca268b JA |
128 | |
129 | #define is_done() (*(volatile int *)(&done)) | |
130 | static volatile int done; | |
131 | ||
132 | static int devfd, ncpus; | |
133 | static struct thread_information *thread_information; | |
134 | static char *buts_name_p; | |
d39c04ca | 135 | static char *dev; |
d5396421 | 136 | static char *output_name; |
5c86134e | 137 | static int act_mask = ~0U; |
830fd65c | 138 | static int trace_started; |
bc39777c | 139 | static int kill_running_trace; |
d39c04ca | 140 | |
d5396421 JA |
141 | static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER; |
142 | ||
72ca8801 NS |
143 | static void exit_trace(int status); |
144 | ||
1f79c4a0 | 145 | static int find_mask_map(char *string) |
d39c04ca | 146 | { |
5c86134e JA |
147 | int i; |
148 | ||
149 | for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++) | |
75da3c6a | 150 | if (COMPARE_MASK_MAP(&mask_maps[i], string)) |
5c86134e | 151 | return mask_maps[i].mask; |
d39c04ca | 152 | |
d39c04ca AB |
153 | return -1; |
154 | } | |
d0ca268b | 155 | |
3aabcd89 | 156 | static int start_trace(char *dev) |
d0ca268b JA |
157 | { |
158 | struct blk_user_trace_setup buts; | |
159 | ||
1f79c4a0 | 160 | memset(&buts, 0, sizeof(buts)); |
d0ca268b JA |
161 | buts.buf_size = BUF_SIZE; |
162 | buts.buf_nr = BUF_NR; | |
d39c04ca | 163 | buts.act_mask = act_mask; |
d0ca268b | 164 | |
d0ca268b JA |
165 | if (ioctl(devfd, BLKSTARTTRACE, &buts) < 0) { |
166 | perror("BLKSTARTTRACE"); | |
167 | return 1; | |
168 | } | |
169 | ||
830fd65c | 170 | trace_started = 1; |
d0ca268b JA |
171 | buts_name_p = strdup(buts.name); |
172 | return 0; | |
173 | } | |
174 | ||
3aabcd89 | 175 | static void stop_trace(void) |
d0ca268b | 176 | { |
bc39777c | 177 | if (trace_started || kill_running_trace) { |
707b0914 JA |
178 | if (ioctl(devfd, BLKSTOPTRACE) < 0) |
179 | perror("BLKSTOPTRACE"); | |
d0ca268b | 180 | |
707b0914 JA |
181 | trace_started = 0; |
182 | } | |
d0ca268b JA |
183 | } |
184 | ||
69e65a9e | 185 | static void *extract_data(struct thread_information *tip, char *ofn, int nb) |
d0ca268b JA |
186 | { |
187 | int ret, bytes_left; | |
87b72777 | 188 | unsigned char *buf, *p; |
d0ca268b | 189 | |
87b72777 | 190 | buf = malloc(nb); |
d0ca268b JA |
191 | p = buf; |
192 | bytes_left = nb; | |
193 | while (bytes_left > 0) { | |
b9d4294e | 194 | ret = read(tip->fd, p, bytes_left); |
3aabcd89 JA |
195 | if (!ret) |
196 | usleep(1000); | |
197 | else if (ret < 0) { | |
b9d4294e | 198 | perror(tip->fn); |
d0ca268b | 199 | fprintf(stderr, "Thread %d extract_data %s failed\n", |
b9d4294e | 200 | tip->cpu, tip->fn); |
87b72777 | 201 | free(buf); |
76718bcd | 202 | exit_trace(1); |
69e65a9e | 203 | return NULL; |
3aabcd89 | 204 | } else { |
d0ca268b JA |
205 | p += ret; |
206 | bytes_left -= ret; | |
207 | } | |
208 | } | |
209 | ||
69e65a9e | 210 | return buf; |
d0ca268b JA |
211 | } |
212 | ||
d5396421 JA |
213 | static inline void tip_fd_unlock(struct thread_information *tip) |
214 | { | |
215 | if (tip->fd_lock) | |
216 | pthread_mutex_unlock(tip->fd_lock); | |
217 | } | |
218 | ||
219 | static inline void tip_fd_lock(struct thread_information *tip) | |
220 | { | |
221 | if (tip->fd_lock) | |
222 | pthread_mutex_lock(tip->fd_lock); | |
223 | } | |
224 | ||
3aabcd89 | 225 | static void *extract(void *arg) |
d0ca268b JA |
226 | { |
227 | struct thread_information *tip = arg; | |
d5396421 | 228 | int ret, pdu_len; |
69e65a9e | 229 | char dp[64], *pdu_data; |
d0ca268b JA |
230 | struct blk_io_trace t; |
231 | pid_t pid = getpid(); | |
232 | cpu_set_t cpu_mask; | |
233 | ||
234 | CPU_ZERO(&cpu_mask); | |
b9d4294e | 235 | CPU_SET((tip->cpu), &cpu_mask); |
d0ca268b JA |
236 | |
237 | if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) { | |
238 | perror("sched_setaffinity"); | |
76718bcd | 239 | exit_trace(1); |
d0ca268b JA |
240 | } |
241 | ||
b9d4294e JA |
242 | snprintf(tip->fn, sizeof(tip->fn), |
243 | "%s/block/%s/trace%d", relay_path, buts_name_p, tip->cpu); | |
244 | tip->fd = open(tip->fn, O_RDONLY); | |
245 | if (tip->fd < 0) { | |
246 | perror(tip->fn); | |
5c86134e JA |
247 | fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu, |
248 | tip->fn); | |
76718bcd | 249 | exit_trace(1); |
d0ca268b JA |
250 | } |
251 | ||
69e65a9e | 252 | pdu_data = NULL; |
d0ca268b | 253 | while (!is_done()) { |
b9d4294e | 254 | ret = read(tip->fd, &t, sizeof(t)); |
d0ca268b JA |
255 | if (ret != sizeof(t)) { |
256 | if (ret < 0) { | |
b9d4294e | 257 | perror(tip->fn); |
d0ca268b | 258 | fprintf(stderr,"Thread %d failed read of %s\n", |
b9d4294e | 259 | tip->cpu, tip->fn); |
76718bcd | 260 | exit_trace(1); |
d0ca268b | 261 | } else if (ret > 0) { |
8fc0abbc | 262 | fprintf(stderr,"Thread %d misread %s %d,%d\n", |
b9d4294e | 263 | tip->cpu, tip->fn, ret, (int)sizeof(t)); |
76718bcd | 264 | exit_trace(1); |
d0ca268b JA |
265 | } else { |
266 | usleep(10000); | |
267 | continue; | |
268 | } | |
269 | } | |
270 | ||
271 | if (verify_trace(&t)) | |
76718bcd | 272 | exit_trace(1); |
d0ca268b | 273 | |
18ada3d4 JA |
274 | pdu_len = t.pdu_len; |
275 | ||
6fe4709e JA |
276 | trace_to_be(&t); |
277 | ||
69e65a9e JA |
278 | if (pdu_len) |
279 | pdu_data = extract_data(tip, dp, pdu_len); | |
280 | ||
281 | /* | |
282 | * now we have both trace and payload, get a lock on the | |
283 | * output descriptor and send it off | |
284 | */ | |
d5396421 JA |
285 | tip_fd_lock(tip); |
286 | ||
287 | ret = write(tip->ofd, &t, sizeof(t)); | |
d0ca268b | 288 | if (ret < 0) { |
d5396421 JA |
289 | fprintf(stderr,"Thread %d failed write\n", tip->cpu); |
290 | tip_fd_unlock(tip); | |
76718bcd | 291 | exit_trace(1); |
d0ca268b JA |
292 | } |
293 | ||
69e65a9e JA |
294 | if (pdu_data) { |
295 | ret = write(tip->ofd, pdu_data, pdu_len); | |
296 | if (ret != pdu_len) { | |
297 | perror("write pdu data"); | |
298 | exit_trace(1); | |
299 | } | |
d5396421 | 300 | |
69e65a9e JA |
301 | free(pdu_data); |
302 | pdu_data = NULL; | |
303 | } | |
87b72777 | 304 | |
69e65a9e | 305 | tip_fd_unlock(tip); |
d0ca268b JA |
306 | tip->events_processed++; |
307 | } | |
308 | ||
309 | return NULL; | |
310 | } | |
311 | ||
3aabcd89 | 312 | static int start_threads(void) |
d0ca268b JA |
313 | { |
314 | struct thread_information *tip; | |
d5396421 | 315 | char op[64]; |
d0ca268b JA |
316 | int i; |
317 | ||
318 | ncpus = sysconf(_SC_NPROCESSORS_ONLN); | |
319 | if (ncpus < 0) { | |
320 | fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n"); | |
1f79c4a0 | 321 | return 0; |
d0ca268b | 322 | } |
d0ca268b JA |
323 | |
324 | thread_information = malloc(ncpus * sizeof(struct thread_information)); | |
325 | for (i = 0, tip = thread_information; i < ncpus; i++, tip++) { | |
d5396421 | 326 | tip->fd_lock = NULL; |
d0ca268b JA |
327 | tip->cpu = i; |
328 | tip->events_processed = 0; | |
329 | ||
d5396421 | 330 | if (!strcmp(output_name, "-")) { |
1f79c4a0 | 331 | tip->ofd = dup(STDOUT_FILENO); |
d5396421 JA |
332 | tip->fd_lock = &stdout_mutex; |
333 | } else { | |
334 | sprintf(op, "%s_out.%d", output_name, tip->cpu); | |
335 | tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644); | |
336 | } | |
337 | ||
338 | if (tip->ofd < 0) { | |
339 | perror(op); | |
1f79c4a0 | 340 | return 0; |
d5396421 JA |
341 | } |
342 | ||
d0ca268b JA |
343 | if (pthread_create(&tip->thread, NULL, extract, tip)) { |
344 | perror( "pthread_create"); | |
345 | return 0; | |
346 | } | |
347 | } | |
348 | ||
349 | return ncpus; | |
350 | } | |
351 | ||
72ca8801 NS |
352 | static void close_thread(struct thread_information *tip) |
353 | { | |
354 | if (tip->fd != -1) | |
355 | close(tip->fd); | |
356 | if (tip->ofd != -1) | |
357 | close(tip->ofd); | |
358 | tip->fd = tip->ofd = -1; | |
359 | } | |
360 | ||
3aabcd89 JA |
361 | static void stop_threads(void) |
362 | { | |
363 | struct thread_information *tip = thread_information; | |
364 | int i; | |
365 | ||
366 | for (i = 0; i < ncpus; i++, tip++) { | |
9267405a | 367 | long ret; |
3aabcd89 JA |
368 | |
369 | if (pthread_join(tip->thread, (void *) &ret)) | |
370 | perror("thread_join"); | |
72ca8801 | 371 | close_thread(tip); |
3aabcd89 JA |
372 | } |
373 | } | |
374 | ||
72ca8801 NS |
375 | static void stop_tracing(void) |
376 | { | |
377 | struct thread_information *tip = thread_information; | |
378 | int i; | |
379 | ||
380 | for (i = 0; i < ncpus; i++, tip++) | |
381 | close_thread(tip); | |
382 | stop_trace(); | |
383 | } | |
384 | ||
385 | static void exit_trace(int status) | |
386 | { | |
387 | stop_tracing(); | |
388 | exit(status); | |
389 | } | |
390 | ||
1f79c4a0 | 391 | static void show_stats(void) |
d0ca268b JA |
392 | { |
393 | int i; | |
394 | struct thread_information *tip; | |
395 | unsigned long events_processed = 0; | |
396 | ||
d5396421 JA |
397 | if (!strcmp(output_name, "-")) |
398 | return; | |
399 | ||
d0ca268b JA |
400 | for (i = 0, tip = thread_information; i < ncpus; i++, tip++) { |
401 | printf("CPU%3d: %20ld events\n", | |
402 | tip->cpu, tip->events_processed); | |
403 | events_processed += tip->events_processed; | |
404 | } | |
405 | ||
406 | printf("Total: %20ld events\n", events_processed); | |
407 | } | |
408 | ||
ee1f4158 NS |
409 | static void show_usage(char *program) |
410 | { | |
411 | fprintf(stderr,"Usage: %s [-d <dev>] " | |
412 | "[-a <trace> [-a <trace>]] <dev>\n", | |
413 | program); | |
414 | } | |
415 | ||
1f79c4a0 | 416 | static void handle_sigint(int sig) |
d0ca268b | 417 | { |
d0ca268b JA |
418 | done = 1; |
419 | } | |
420 | ||
421 | int main(int argc, char *argv[]) | |
422 | { | |
5270dddd | 423 | static char default_relay_path[] = "/relay"; |
d0ca268b | 424 | struct stat st; |
d39c04ca AB |
425 | int i, c; |
426 | int act_mask_tmp = 0; | |
427 | ||
428 | while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) { | |
429 | switch (c) { | |
430 | case 'a': | |
431 | i = find_mask_map(optarg); | |
432 | if (i < 0) { | |
433 | fprintf(stderr,"Invalid action mask %s\n", | |
434 | optarg); | |
7425d456 | 435 | return 1; |
d39c04ca AB |
436 | } |
437 | act_mask_tmp |= i; | |
438 | break; | |
439 | ||
440 | case 'A': | |
441 | if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) { | |
442 | fprintf(stderr, | |
443 | "Invalid set action mask %s/0x%x\n", | |
444 | optarg, i); | |
7425d456 | 445 | return 1; |
d39c04ca AB |
446 | } |
447 | act_mask_tmp = i; | |
448 | break; | |
d0ca268b | 449 | |
d39c04ca | 450 | case 'd': |
ee1f4158 | 451 | dev = optarg; |
d39c04ca AB |
452 | break; |
453 | ||
5270dddd JA |
454 | case 'r': |
455 | relay_path = optarg; | |
456 | break; | |
457 | ||
d5396421 JA |
458 | case 'o': |
459 | output_name = strdup(optarg); | |
460 | break; | |
bc39777c JA |
461 | case 'k': |
462 | kill_running_trace = 1; | |
463 | break; | |
d5396421 | 464 | |
d39c04ca | 465 | default: |
ee1f4158 | 466 | show_usage(argv[0]); |
7425d456 | 467 | return 1; |
d39c04ca AB |
468 | } |
469 | } | |
470 | ||
ee1f4158 NS |
471 | while (optind < argc) |
472 | dev = argv[optind++]; | |
473 | ||
474 | if (dev == NULL) { | |
475 | show_usage(argv[0]); | |
7425d456 | 476 | return 1; |
d39c04ca AB |
477 | } |
478 | ||
5270dddd JA |
479 | if (!relay_path) |
480 | relay_path = default_relay_path; | |
481 | ||
d5396421 | 482 | if (act_mask_tmp != 0) |
d39c04ca | 483 | act_mask = act_mask_tmp; |
d0ca268b JA |
484 | |
485 | if (stat(relay_path, &st) < 0) { | |
d39c04ca | 486 | fprintf(stderr,"%s does not appear to be mounted\n", |
d0ca268b | 487 | relay_path); |
7425d456 | 488 | return 1; |
d0ca268b JA |
489 | } |
490 | ||
bc39777c JA |
491 | devfd = open(dev, O_RDONLY); |
492 | if (devfd < 0) { | |
493 | perror(dev); | |
7425d456 | 494 | return 1; |
bc39777c JA |
495 | } |
496 | ||
497 | if (kill_running_trace) { | |
498 | stop_trace(); | |
7425d456 | 499 | return 0; |
bc39777c JA |
500 | } |
501 | ||
d39c04ca | 502 | if (start_trace(dev)) { |
5270dddd | 503 | close(devfd); |
d39c04ca | 504 | fprintf(stderr, "Failed to start trace on %s\n", dev); |
7425d456 | 505 | return 1; |
d0ca268b JA |
506 | } |
507 | ||
508 | setlocale(LC_NUMERIC, "en_US"); | |
509 | ||
d5396421 JA |
510 | if (!output_name) |
511 | output_name = strdup(buts_name_p); | |
512 | ||
d0ca268b JA |
513 | i = start_threads(); |
514 | if (!i) { | |
515 | fprintf(stderr, "Failed to start worker threads\n"); | |
516 | stop_trace(); | |
7425d456 | 517 | return 1; |
d0ca268b JA |
518 | } |
519 | ||
d0ca268b JA |
520 | signal(SIGINT, handle_sigint); |
521 | signal(SIGHUP, handle_sigint); | |
522 | signal(SIGTERM, handle_sigint); | |
523 | ||
72ca8801 | 524 | atexit(stop_tracing); |
830fd65c | 525 | |
d0ca268b JA |
526 | while (!is_done()) |
527 | sleep(1); | |
528 | ||
b9d4294e | 529 | stop_threads(); |
3a9995b9 | 530 | stop_trace(); |
d0ca268b | 531 | show_stats(); |
bc39777c | 532 | close(devfd); |
d0ca268b JA |
533 | |
534 | return 0; | |
535 | } | |
536 |