[PATCH] blkparse: account skips for file reads as well
[blktrace.git] / blktrace.c
CommitLineData
d0ca268b
JA
1/*
2 * block queue tracing application
3 *
d956a2cd
JA
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
d0ca268b
JA
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
b9d4294e 30#include <sys/param.h>
e3e74029 31#include <sys/statfs.h>
d0ca268b
JA
32#include <stdio.h>
33#include <stdlib.h>
34#include <sched.h>
d39c04ca
AB
35#include <ctype.h>
36#include <getopt.h>
d0ca268b
JA
37
38#include "blktrace.h"
39
40#define BUF_SIZE (128 *1024)
41#define BUF_NR (4)
42
e3e74029
NS
43#define RELAYFS_TYPE 0xF0B4A981
44
d39c04ca
AB
45#define DECLARE_MASK_MAP(mask) { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
46#define COMPARE_MASK_MAP(mmp, str) \
75da3c6a
NS
47 (!strcasecmp((mmp)->short_form, (str)) || \
48 !strcasecmp((mmp)->long_form, (str)))
d39c04ca
AB
49
50#define VALID_SET(x) ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
51
52struct mask_map {
53 int mask;
54 char *short_form;
55 char *long_form;
56};
57
58struct mask_map mask_maps[] = {
5c86134e
JA
59 DECLARE_MASK_MAP(READ),
60 DECLARE_MASK_MAP(WRITE),
61 DECLARE_MASK_MAP(BARRIER),
62 DECLARE_MASK_MAP(SYNC),
63 DECLARE_MASK_MAP(QUEUE),
64 DECLARE_MASK_MAP(REQUEUE),
65 DECLARE_MASK_MAP(ISSUE),
66 DECLARE_MASK_MAP(COMPLETE),
67 DECLARE_MASK_MAP(FS),
68 DECLARE_MASK_MAP(PC),
d39c04ca
AB
69};
70
ece238a6 71#define S_OPTS "d:a:A:r:o:kw:"
d5396421 72static struct option l_opts[] = {
5c86134e 73 {
d39c04ca
AB
74 .name = "dev",
75 .has_arg = 1,
76 .flag = NULL,
77 .val = 'd'
78 },
5c86134e 79 {
d39c04ca
AB
80 .name = "act-mask",
81 .has_arg = 1,
82 .flag = NULL,
83 .val = 'a'
84 },
5c86134e 85 {
d39c04ca
AB
86 .name = "set-mask",
87 .has_arg = 1,
88 .flag = NULL,
89 .val = 'A'
90 },
5c86134e 91 {
5270dddd
JA
92 .name = "relay",
93 .has_arg = 1,
94 .flag = NULL,
95 .val = 'r'
96 },
d5396421
JA
97 {
98 .name = "output",
99 .has_arg = 1,
100 .flag = NULL,
101 .val = 'o'
102 },
bc39777c
JA
103 {
104 .name = "kill",
105 .has_arg = 0,
106 .flag = NULL,
107 .val = 'k'
108 },
ece238a6
NS
109 {
110 .name = "stopwatch",
111 .has_arg = 1,
112 .flag = NULL,
113 .val = 'w'
114 },
d39c04ca
AB
115 {
116 .name = NULL,
117 .has_arg = 0,
118 .flag = NULL,
119 .val = 0
120 }
121};
122
d0ca268b
JA
123struct thread_information {
124 int cpu;
125 pthread_t thread;
b9d4294e
JA
126
127 int fd;
128 char fn[MAXPATHLEN + 64];
129
d5396421
JA
130 pthread_mutex_t *fd_lock;
131 int ofd;
132
d0ca268b 133 unsigned long events_processed;
e7c9f3ff 134 struct device_information *device;
d0ca268b
JA
135};
136
e7c9f3ff
NS
137struct device_information {
138 int fd;
139 char *path;
140 char buts_name[32];
141 int trace_started;
142 struct thread_information *threads;
143};
d0ca268b 144
e7c9f3ff 145static int ncpus;
d0ca268b 146static struct thread_information *thread_information;
e7c9f3ff
NS
147static int ndevs;
148static struct device_information *device_information;
149
150/* command line option globals */
151static char *relay_path;
d5396421 152static char *output_name;
5c86134e 153static int act_mask = ~0U;
bc39777c 154static int kill_running_trace;
d39c04ca 155
e7c9f3ff
NS
156#define is_done() (*(volatile int *)(&done))
157static volatile int done;
158
d5396421
JA
159static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
160
72ca8801
NS
161static void exit_trace(int status);
162
1f79c4a0 163static int find_mask_map(char *string)
d39c04ca 164{
5c86134e
JA
165 int i;
166
167 for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
75da3c6a 168 if (COMPARE_MASK_MAP(&mask_maps[i], string))
5c86134e 169 return mask_maps[i].mask;
d39c04ca 170
d39c04ca
AB
171 return -1;
172}
d0ca268b 173
e7c9f3ff 174static int start_trace(struct device_information *dip)
d0ca268b
JA
175{
176 struct blk_user_trace_setup buts;
177
1f79c4a0 178 memset(&buts, 0, sizeof(buts));
d0ca268b
JA
179 buts.buf_size = BUF_SIZE;
180 buts.buf_nr = BUF_NR;
d39c04ca 181 buts.act_mask = act_mask;
d0ca268b 182
e7c9f3ff 183 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
d0ca268b
JA
184 perror("BLKSTARTTRACE");
185 return 1;
186 }
187
e7c9f3ff
NS
188 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
189 dip->trace_started = 1;
d0ca268b
JA
190 return 0;
191}
192
e7c9f3ff 193static void stop_trace(struct device_information *dip)
d0ca268b 194{
e7c9f3ff
NS
195 if (dip->trace_started || kill_running_trace) {
196 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
707b0914 197 perror("BLKSTOPTRACE");
e7c9f3ff
NS
198 close(dip->fd);
199 dip->trace_started = 0;
707b0914 200 }
d0ca268b
JA
201}
202
e7c9f3ff
NS
203static void stop_all_traces(void)
204{
205 struct device_information *dip;
206 int i;
207
208 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
209 stop_trace(dip);
210}
211
69e65a9e 212static void *extract_data(struct thread_information *tip, char *ofn, int nb)
d0ca268b
JA
213{
214 int ret, bytes_left;
87b72777 215 unsigned char *buf, *p;
d0ca268b 216
87b72777 217 buf = malloc(nb);
d0ca268b
JA
218 p = buf;
219 bytes_left = nb;
220 while (bytes_left > 0) {
b9d4294e 221 ret = read(tip->fd, p, bytes_left);
3aabcd89
JA
222 if (!ret)
223 usleep(1000);
224 else if (ret < 0) {
b9d4294e 225 perror(tip->fn);
d0ca268b 226 fprintf(stderr, "Thread %d extract_data %s failed\n",
b9d4294e 227 tip->cpu, tip->fn);
87b72777 228 free(buf);
76718bcd 229 exit_trace(1);
69e65a9e 230 return NULL;
3aabcd89 231 } else {
d0ca268b
JA
232 p += ret;
233 bytes_left -= ret;
234 }
235 }
236
69e65a9e 237 return buf;
d0ca268b
JA
238}
239
d5396421
JA
240static inline void tip_fd_unlock(struct thread_information *tip)
241{
242 if (tip->fd_lock)
243 pthread_mutex_unlock(tip->fd_lock);
244}
245
246static inline void tip_fd_lock(struct thread_information *tip)
247{
248 if (tip->fd_lock)
249 pthread_mutex_lock(tip->fd_lock);
250}
251
3aabcd89 252static void *extract(void *arg)
d0ca268b
JA
253{
254 struct thread_information *tip = arg;
d5396421 255 int ret, pdu_len;
69e65a9e 256 char dp[64], *pdu_data;
d0ca268b
JA
257 struct blk_io_trace t;
258 pid_t pid = getpid();
259 cpu_set_t cpu_mask;
260
261 CPU_ZERO(&cpu_mask);
b9d4294e 262 CPU_SET((tip->cpu), &cpu_mask);
d0ca268b
JA
263
264 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
265 perror("sched_setaffinity");
76718bcd 266 exit_trace(1);
d0ca268b
JA
267 }
268
e7c9f3ff
NS
269 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
270 relay_path, tip->device->buts_name, tip->cpu);
b9d4294e
JA
271 tip->fd = open(tip->fn, O_RDONLY);
272 if (tip->fd < 0) {
273 perror(tip->fn);
5c86134e
JA
274 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
275 tip->fn);
76718bcd 276 exit_trace(1);
d0ca268b
JA
277 }
278
69e65a9e 279 pdu_data = NULL;
d0ca268b 280 while (!is_done()) {
b9d4294e 281 ret = read(tip->fd, &t, sizeof(t));
d0ca268b
JA
282 if (ret != sizeof(t)) {
283 if (ret < 0) {
b9d4294e 284 perror(tip->fn);
d0ca268b 285 fprintf(stderr,"Thread %d failed read of %s\n",
b9d4294e 286 tip->cpu, tip->fn);
76718bcd 287 exit_trace(1);
d0ca268b 288 } else if (ret > 0) {
8fc0abbc 289 fprintf(stderr,"Thread %d misread %s %d,%d\n",
b9d4294e 290 tip->cpu, tip->fn, ret, (int)sizeof(t));
76718bcd 291 exit_trace(1);
d0ca268b
JA
292 } else {
293 usleep(10000);
294 continue;
295 }
296 }
297
298 if (verify_trace(&t))
76718bcd 299 exit_trace(1);
d0ca268b 300
18ada3d4
JA
301 pdu_len = t.pdu_len;
302
6fe4709e
JA
303 trace_to_be(&t);
304
69e65a9e
JA
305 if (pdu_len)
306 pdu_data = extract_data(tip, dp, pdu_len);
307
308 /*
309 * now we have both trace and payload, get a lock on the
310 * output descriptor and send it off
311 */
d5396421
JA
312 tip_fd_lock(tip);
313
314 ret = write(tip->ofd, &t, sizeof(t));
d0ca268b 315 if (ret < 0) {
d5396421
JA
316 fprintf(stderr,"Thread %d failed write\n", tip->cpu);
317 tip_fd_unlock(tip);
76718bcd 318 exit_trace(1);
d0ca268b
JA
319 }
320
69e65a9e
JA
321 if (pdu_data) {
322 ret = write(tip->ofd, pdu_data, pdu_len);
323 if (ret != pdu_len) {
324 perror("write pdu data");
325 exit_trace(1);
326 }
d5396421 327
69e65a9e
JA
328 free(pdu_data);
329 pdu_data = NULL;
330 }
87b72777 331
69e65a9e 332 tip_fd_unlock(tip);
d0ca268b
JA
333 tip->events_processed++;
334 }
335
336 return NULL;
337}
338
e7c9f3ff 339static int start_threads(struct device_information *dip)
d0ca268b
JA
340{
341 struct thread_information *tip;
d5396421 342 char op[64];
e7c9f3ff 343 int j, pipeline = output_name && !strcmp(output_name, "-");
d0ca268b 344
e7c9f3ff
NS
345 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
346 tip->cpu = j;
347 tip->device = dip;
d5396421 348 tip->fd_lock = NULL;
d0ca268b
JA
349 tip->events_processed = 0;
350
e7c9f3ff 351 if (pipeline) {
1f79c4a0 352 tip->ofd = dup(STDOUT_FILENO);
d5396421
JA
353 tip->fd_lock = &stdout_mutex;
354 } else {
e7c9f3ff
NS
355 if (output_name)
356 sprintf(op, "%s_%s_out.%d", output_name,
357 dip->buts_name, tip->cpu);
358 else
359 sprintf(op, "%s_out.%d",
360 dip->buts_name, tip->cpu);
d5396421
JA
361 tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
362 }
363
364 if (tip->ofd < 0) {
365 perror(op);
e7c9f3ff 366 return 1;
d5396421
JA
367 }
368
d0ca268b 369 if (pthread_create(&tip->thread, NULL, extract, tip)) {
e7c9f3ff
NS
370 perror("pthread_create");
371 close(tip->ofd);
372 return 1;
d0ca268b
JA
373 }
374 }
375
e7c9f3ff 376 return 0;
d0ca268b
JA
377}
378
72ca8801
NS
379static void close_thread(struct thread_information *tip)
380{
381 if (tip->fd != -1)
382 close(tip->fd);
383 if (tip->ofd != -1)
384 close(tip->ofd);
385 tip->fd = tip->ofd = -1;
386}
387
e7c9f3ff 388static void stop_threads(struct device_information *dip)
3aabcd89 389{
e7c9f3ff
NS
390 struct thread_information *tip;
391 long ret;
392 int j;
3aabcd89 393
e7c9f3ff 394 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
3aabcd89
JA
395 if (pthread_join(tip->thread, (void *) &ret))
396 perror("thread_join");
72ca8801 397 close_thread(tip);
3aabcd89
JA
398 }
399}
400
e7c9f3ff 401static void stop_all_threads(void)
72ca8801 402{
e7c9f3ff 403 struct device_information *dip;
72ca8801
NS
404 int i;
405
e7c9f3ff
NS
406 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
407 stop_threads(dip);
408}
409
410static void stop_all_tracing(void)
411{
412 struct device_information *dip;
413 struct thread_information *tip;
414 int i, j;
415
416 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
417 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
418 close_thread(tip);
419 stop_trace(dip);
420 }
72ca8801
NS
421}
422
423static void exit_trace(int status)
424{
e7c9f3ff 425 stop_all_tracing();
72ca8801
NS
426 exit(status);
427}
428
e7c9f3ff
NS
429static int resize_devices(char *path)
430{
431 int size = (ndevs + 1) * sizeof(struct device_information);
432
433 device_information = realloc(device_information, size);
434 if (!device_information) {
435 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
436 return 1;
437 }
438 device_information[ndevs].path = path;
439 ndevs++;
440 return 0;
441}
442
443static int open_devices(void)
d0ca268b 444{
e7c9f3ff 445 struct device_information *dip;
d0ca268b 446 int i;
d0ca268b 447
e7c9f3ff
NS
448 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
449 dip->fd = open(dip->path, O_RDONLY);
450 if (dip->fd < 0) {
451 perror(dip->path);
452 return 1;
453 }
454 }
455 return 0;
456}
457
458static int start_devices(void)
459{
460 struct device_information *dip;
461 int i, j, size;
462
463 size = ncpus * sizeof(struct thread_information);
464 thread_information = malloc(size * ndevs);
465 if (!thread_information) {
466 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
467 return 1;
468 }
d5396421 469
e7c9f3ff
NS
470 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
471 if (start_trace(dip)) {
472 close(dip->fd);
473 fprintf(stderr, "Failed to start trace on %s\n",
474 dip->path);
475 break;
476 }
477 }
478 if (i != ndevs) {
479 for (dip = device_information, j = 0; j < i; j++, dip++)
480 stop_trace(dip);
481 return 1;
482 }
483
484 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
485 dip->threads = thread_information + (i * ncpus);
486 if (start_threads(dip)) {
487 fprintf(stderr, "Failed to start worker threads\n");
488 break;
489 }
490 }
491 if (i != ndevs) {
492 for (dip = device_information, j = 0; j < i; j++, dip++)
493 stop_threads(dip);
494 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
495 stop_trace(dip);
496 return 1;
d0ca268b
JA
497 }
498
e7c9f3ff 499 return 0;
d0ca268b
JA
500}
501
e7c9f3ff
NS
502static void show_stats(void)
503{
504 int i, j;
505 struct device_information *dip;
506 struct thread_information *tip;
507 unsigned long long events_processed;
508
509 if (output_name && !strcmp(output_name, "-"))
510 return;
511
512 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
513 printf("Device: %s\n", dip->path);
514 events_processed = 0;
515 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
516 printf(" CPU%3d: %20ld events\n",
517 tip->cpu, tip->events_processed);
518 events_processed += tip->events_processed;
519 }
520 printf(" Total: %20lld events\n", events_processed);
521 }
522}
523
ee1f4158
NS
524static void show_usage(char *program)
525{
526 fprintf(stderr,"Usage: %s [-d <dev>] "
527 "[-a <trace> [-a <trace>]] <dev>\n",
528 program);
529}
530
1f79c4a0 531static void handle_sigint(int sig)
d0ca268b 532{
d0ca268b
JA
533 done = 1;
534}
535
536int main(int argc, char *argv[])
537{
5270dddd 538 static char default_relay_path[] = "/relay";
e3e74029 539 struct statfs st;
d39c04ca 540 int i, c;
ece238a6 541 int stop_watch = 0;
d39c04ca
AB
542 int act_mask_tmp = 0;
543
544 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
545 switch (c) {
546 case 'a':
547 i = find_mask_map(optarg);
548 if (i < 0) {
549 fprintf(stderr,"Invalid action mask %s\n",
550 optarg);
7425d456 551 return 1;
d39c04ca
AB
552 }
553 act_mask_tmp |= i;
554 break;
555
556 case 'A':
557 if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
558 fprintf(stderr,
559 "Invalid set action mask %s/0x%x\n",
560 optarg, i);
7425d456 561 return 1;
d39c04ca
AB
562 }
563 act_mask_tmp = i;
564 break;
d0ca268b 565
d39c04ca 566 case 'd':
e7c9f3ff
NS
567 if (resize_devices(optarg) != 0)
568 return 1;
d39c04ca
AB
569 break;
570
5270dddd
JA
571 case 'r':
572 relay_path = optarg;
573 break;
574
d5396421 575 case 'o':
66efebf8 576 output_name = optarg;
d5396421 577 break;
bc39777c
JA
578 case 'k':
579 kill_running_trace = 1;
580 break;
ece238a6
NS
581 case 'w':
582 stop_watch = atoi(optarg);
583 if (stop_watch <= 0) {
584 fprintf(stderr,
585 "Invalid stopwatch value (%d secs)\n",
586 stop_watch);
587 return 1;
588 }
589 break;
d5396421 590
d39c04ca 591 default:
ee1f4158 592 show_usage(argv[0]);
7425d456 593 return 1;
d39c04ca
AB
594 }
595 }
596
e7c9f3ff
NS
597 while (optind < argc) {
598 if (resize_devices(argv[optind++]) != 0)
599 return 1;
600 }
ee1f4158 601
e7c9f3ff 602 if (ndevs == 0) {
ee1f4158 603 show_usage(argv[0]);
7425d456 604 return 1;
d39c04ca
AB
605 }
606
5270dddd
JA
607 if (!relay_path)
608 relay_path = default_relay_path;
609
d5396421 610 if (act_mask_tmp != 0)
d39c04ca 611 act_mask = act_mask_tmp;
d0ca268b 612
e3e74029
NS
613 if (statfs(relay_path, &st) < 0) {
614 perror("statfs");
615 fprintf(stderr,"%s does not appear to be a valid path\n",
616 relay_path);
617 return 1;
618 } else if (st.f_type != RELAYFS_TYPE) {
619 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
d0ca268b 620 relay_path);
7425d456 621 return 1;
d0ca268b
JA
622 }
623
e7c9f3ff 624 if (open_devices() != 0)
7425d456 625 return 1;
bc39777c
JA
626
627 if (kill_running_trace) {
e7c9f3ff 628 stop_all_traces();
7425d456 629 return 0;
bc39777c
JA
630 }
631
d0ca268b
JA
632 setlocale(LC_NUMERIC, "en_US");
633
e7c9f3ff
NS
634 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
635 if (ncpus < 0) {
636 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
7425d456 637 return 1;
d0ca268b
JA
638 }
639
e7c9f3ff
NS
640 if (start_devices() != 0)
641 return 1;
642
d0ca268b
JA
643 signal(SIGINT, handle_sigint);
644 signal(SIGHUP, handle_sigint);
645 signal(SIGTERM, handle_sigint);
ece238a6 646 signal(SIGALRM, handle_sigint);
d0ca268b 647
e7c9f3ff 648 atexit(stop_all_tracing);
830fd65c 649
ece238a6
NS
650 if (stop_watch)
651 alarm(stop_watch);
652
d0ca268b
JA
653 while (!is_done())
654 sleep(1);
655
e7c9f3ff
NS
656 stop_all_threads();
657 stop_all_traces();
d0ca268b
JA
658 show_stats();
659
660 return 0;
661}
662