[PATCH] blktrace.tex: add description of each possible action
[blktrace.git] / blktrace.c
... / ...
CommitLineData
1/*
2 * block queue tracing application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21#include <pthread.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <unistd.h>
25#include <locale.h>
26#include <signal.h>
27#include <fcntl.h>
28#include <string.h>
29#include <sys/ioctl.h>
30#include <sys/param.h>
31#include <sys/statfs.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <sched.h>
35#include <ctype.h>
36#include <getopt.h>
37
38#include "blktrace.h"
39
40static char blktrace_version[] = "0.90";
41
42#define BUF_SIZE (128 *1024)
43#define BUF_NR (4)
44
45#define RELAYFS_TYPE 0xF0B4A981
46
47#define S_OPTS "d:a:A:r:o:kw:vb:n:D:"
48static struct option l_opts[] = {
49 {
50 .name = "dev",
51 .has_arg = required_argument,
52 .flag = NULL,
53 .val = 'd'
54 },
55 {
56 .name = "act-mask",
57 .has_arg = required_argument,
58 .flag = NULL,
59 .val = 'a'
60 },
61 {
62 .name = "set-mask",
63 .has_arg = required_argument,
64 .flag = NULL,
65 .val = 'A'
66 },
67 {
68 .name = "relay",
69 .has_arg = required_argument,
70 .flag = NULL,
71 .val = 'r'
72 },
73 {
74 .name = "output",
75 .has_arg = required_argument,
76 .flag = NULL,
77 .val = 'o'
78 },
79 {
80 .name = "kill",
81 .has_arg = no_argument,
82 .flag = NULL,
83 .val = 'k'
84 },
85 {
86 .name = "stopwatch",
87 .has_arg = required_argument,
88 .flag = NULL,
89 .val = 'w'
90 },
91 {
92 .name = "version",
93 .has_arg = no_argument,
94 .flag = NULL,
95 .val = 'v'
96 },
97 {
98 .name = "buffer-size",
99 .has_arg = required_argument,
100 .flag = NULL,
101 .val = 'b'
102 },
103 {
104 .name = "num-sub-buffers",
105 .has_arg = required_argument,
106 .flag = NULL,
107 .val = 'n'
108 },
109 {
110 .name = "output-dir",
111 .has_arg = required_argument,
112 .flag = NULL,
113 .val = 'D'
114 },
115 {
116 .name = NULL,
117 }
118};
119
120struct thread_information {
121 int cpu;
122 pthread_t thread;
123
124 int fd;
125 char fn[MAXPATHLEN + 64];
126 void *buf;
127 unsigned long buf_offset;
128 unsigned int buf_subbuf;
129 unsigned int sequence;
130
131 pthread_mutex_t *fd_lock;
132 int ofd;
133
134 unsigned long events_processed;
135 struct device_information *device;
136};
137
138struct device_information {
139 int fd;
140 char *path;
141 char buts_name[32];
142 int trace_started;
143 struct thread_information *threads;
144};
145
146static int ncpus;
147static struct thread_information *thread_information;
148static int ndevs;
149static struct device_information *device_information;
150
151/* command line option globals */
152static char *relay_path;
153static char *output_name;
154static char *output_dir;
155static int act_mask = ~0U;
156static int kill_running_trace;
157static unsigned int buf_size = BUF_SIZE;
158static unsigned int buf_nr = BUF_NR;
159
160#define is_done() (*(volatile int *)(&done))
161static volatile int done;
162
163static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
164
165static void exit_trace(int status);
166
167static int start_trace(struct device_information *dip)
168{
169 struct blk_user_trace_setup buts;
170
171 memset(&buts, 0, sizeof(buts));
172 buts.buf_size = buf_size;
173 buts.buf_nr = buf_nr;
174 buts.act_mask = act_mask;
175
176 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
177 perror("BLKSTARTTRACE");
178 return 1;
179 }
180
181 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
182 dip->trace_started = 1;
183 return 0;
184}
185
186static void stop_trace(struct device_information *dip)
187{
188 if (dip->trace_started || kill_running_trace) {
189 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
190 perror("BLKSTOPTRACE");
191 close(dip->fd);
192 dip->trace_started = 0;
193 }
194}
195
196static void stop_all_traces(void)
197{
198 struct device_information *dip;
199 int i;
200
201 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
202 stop_trace(dip);
203}
204
205static int read_data(struct thread_information *tip, void *buf, int len)
206{
207 char *p = buf;
208 int ret, bytes_left = len;
209
210 while (!is_done() && bytes_left > 0) {
211 ret = read(tip->fd, p, bytes_left);
212 if (ret == bytes_left)
213 return 0;
214
215 if (ret < 0) {
216 perror(tip->fn);
217 fprintf(stderr,"Thread %d failed read of %s\n",
218 tip->cpu, tip->fn);
219 break;
220 } else if (ret > 0) {
221 p += ret;
222 bytes_left -= ret;
223 } else
224 usleep(1000);
225 }
226
227 return -1;
228}
229
230static int write_data(int fd, void *buf, unsigned int buf_len)
231{
232 int ret, bytes_left;
233 char *p = buf;
234
235 bytes_left = buf_len;
236 while (bytes_left > 0) {
237 ret = write(fd, p, bytes_left);
238 if (ret == bytes_left)
239 break;
240
241 if (ret < 0) {
242 perror("write");
243 return 1;
244 } else if (ret > 0) {
245 p += ret;
246 bytes_left -= ret;
247 } else {
248 fprintf(stderr, "Zero write?\n");
249 return 1;
250 }
251 }
252
253 return 0;
254}
255
256static void *extract_data(struct thread_information *tip, int nb)
257{
258 unsigned char *buf;
259
260 buf = malloc(nb);
261 if (!read_data(tip, buf, nb))
262 return buf;
263
264 free(buf);
265 return NULL;
266}
267
268/*
269 * trace may start inside 'bit' or may need to be gotten further on
270 */
271static int get_event_slow(struct thread_information *tip,
272 struct blk_io_trace *bit)
273{
274 const int inc = sizeof(__u32);
275 struct blk_io_trace foo;
276 unsigned int offset;
277 void *p;
278
279 /*
280 * check is trace is inside
281 */
282 offset = 0;
283 p = bit;
284 while (offset < sizeof(*bit)) {
285 p += inc;
286 offset += inc;
287
288 memcpy(&foo, p, inc);
289
290 if (CHECK_MAGIC(&foo))
291 break;
292 }
293
294 /*
295 * part trace found inside, read the rest
296 */
297 if (offset < sizeof(*bit)) {
298 int good_bytes = sizeof(*bit) - offset;
299
300 memmove(bit, p, good_bytes);
301 p = (void *) bit + good_bytes;
302
303 return read_data(tip, p, offset);
304 }
305
306 /*
307 * nothing found, keep looking for start of trace
308 */
309 do {
310 if (read_data(tip, bit, sizeof(bit->magic)))
311 return -1;
312 } while (!CHECK_MAGIC(bit));
313
314 /*
315 * now get the rest of it
316 */
317 p = &bit->sequence;
318 if (!read_data(tip, p, sizeof(*bit) - inc))
319 return -1;
320
321 return 0;
322}
323
324/*
325 * Sometimes relayfs screws us a little, if an event crosses a sub buffer
326 * boundary. So keep looking forward in the trace data until an event
327 * is found
328 */
329static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
330{
331 /*
332 * optimize for the common fast case, a full trace read that
333 * succeeds
334 */
335 if (read_data(tip, bit, sizeof(*bit)))
336 return -1;
337
338 if (CHECK_MAGIC(bit))
339 return 0;
340
341 /*
342 * ok that didn't work, the event may start somewhere inside the
343 * trace itself
344 */
345 return get_event_slow(tip, bit);
346}
347
348static inline void tip_fd_unlock(struct thread_information *tip)
349{
350 if (tip->fd_lock)
351 pthread_mutex_unlock(tip->fd_lock);
352}
353
354static inline void tip_fd_lock(struct thread_information *tip)
355{
356 if (tip->fd_lock)
357 pthread_mutex_lock(tip->fd_lock);
358}
359
360static void *extract(void *arg)
361{
362 struct thread_information *tip = arg;
363 int pdu_len;
364 char *pdu_data;
365 struct blk_io_trace t;
366 pid_t pid = getpid();
367 cpu_set_t cpu_mask;
368
369 CPU_ZERO(&cpu_mask);
370 CPU_SET((tip->cpu), &cpu_mask);
371
372 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
373 perror("sched_setaffinity");
374 exit_trace(1);
375 }
376
377 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
378 relay_path, tip->device->buts_name, tip->cpu);
379 tip->fd = open(tip->fn, O_RDONLY);
380 if (tip->fd < 0) {
381 perror(tip->fn);
382 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
383 tip->fn);
384 exit_trace(1);
385 }
386
387 pdu_data = NULL;
388 while (!is_done()) {
389 if (get_event(tip, &t))
390 break;
391
392 if (verify_trace(&t))
393 break;
394
395 pdu_len = t.pdu_len;
396
397 trace_to_be(&t);
398
399 if (pdu_len) {
400 pdu_data = extract_data(tip, pdu_len);
401 if (!pdu_data)
402 break;
403 }
404
405 /*
406 * now we have both trace and payload, get a lock on the
407 * output descriptor and send it off
408 */
409 tip_fd_lock(tip);
410
411 if (write_data(tip->ofd, &t, sizeof(t))) {
412 tip_fd_unlock(tip);
413 break;
414 }
415
416 if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
417 tip_fd_unlock(tip);
418 break;
419 }
420
421 tip_fd_unlock(tip);
422
423 if (pdu_data) {
424 free(pdu_data);
425 pdu_data = NULL;
426 }
427
428 tip->events_processed++;
429 }
430
431 exit_trace(1);
432 return NULL;
433}
434
435static int start_threads(struct device_information *dip)
436{
437 struct thread_information *tip;
438 char op[64];
439 int j, pipeline = output_name && !strcmp(output_name, "-");
440 int len;
441
442 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
443 tip->cpu = j;
444 tip->device = dip;
445 tip->fd_lock = NULL;
446 tip->events_processed = 0;
447
448 if (pipeline) {
449 tip->ofd = dup(STDOUT_FILENO);
450 tip->fd_lock = &stdout_mutex;
451 } else {
452 len = 0;
453
454 if (output_dir)
455 len = sprintf(op, "%s/", output_dir);
456
457 if (output_name) {
458 sprintf(op + len, "%s.blktrace.%d", output_name,
459 tip->cpu);
460 } else {
461 sprintf(op + len, "%s.blktrace.%d",
462 dip->buts_name, tip->cpu);
463 }
464 tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
465 }
466
467 if (tip->ofd < 0) {
468 perror(op);
469 return 1;
470 }
471
472 if (pthread_create(&tip->thread, NULL, extract, tip)) {
473 perror("pthread_create");
474 close(tip->ofd);
475 return 1;
476 }
477 }
478
479 return 0;
480}
481
482static void close_thread(struct thread_information *tip)
483{
484 if (tip->fd != -1)
485 close(tip->fd);
486 if (tip->ofd != -1)
487 close(tip->ofd);
488
489 tip->fd = tip->ofd = -1;
490}
491
492static void stop_threads(struct device_information *dip)
493{
494 struct thread_information *tip;
495 long ret;
496 int j;
497
498 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
499 if (pthread_join(tip->thread, (void *) &ret))
500 perror("thread_join");
501 close_thread(tip);
502 }
503}
504
505static void stop_all_threads(void)
506{
507 struct device_information *dip;
508 int i;
509
510 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
511 stop_threads(dip);
512}
513
514static void stop_all_tracing(void)
515{
516 struct device_information *dip;
517 struct thread_information *tip;
518 int i, j;
519
520 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
521 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
522 close_thread(tip);
523 stop_trace(dip);
524 }
525}
526
527static void exit_trace(int status)
528{
529 stop_all_tracing();
530 exit(status);
531}
532
533static int resize_devices(char *path)
534{
535 int size = (ndevs + 1) * sizeof(struct device_information);
536
537 device_information = realloc(device_information, size);
538 if (!device_information) {
539 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
540 return 1;
541 }
542 device_information[ndevs].path = path;
543 ndevs++;
544 return 0;
545}
546
547static int open_devices(void)
548{
549 struct device_information *dip;
550 int i;
551
552 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
553 dip->fd = open(dip->path, O_RDONLY);
554 if (dip->fd < 0) {
555 perror(dip->path);
556 return 1;
557 }
558 }
559 return 0;
560}
561
562static int start_devices(void)
563{
564 struct device_information *dip;
565 int i, j, size;
566
567 size = ncpus * sizeof(struct thread_information);
568 thread_information = malloc(size * ndevs);
569 if (!thread_information) {
570 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
571 return 1;
572 }
573
574 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
575 if (start_trace(dip)) {
576 close(dip->fd);
577 fprintf(stderr, "Failed to start trace on %s\n",
578 dip->path);
579 break;
580 }
581 }
582 if (i != ndevs) {
583 for (dip = device_information, j = 0; j < i; j++, dip++)
584 stop_trace(dip);
585 return 1;
586 }
587
588 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
589 dip->threads = thread_information + (i * ncpus);
590 if (start_threads(dip)) {
591 fprintf(stderr, "Failed to start worker threads\n");
592 break;
593 }
594 }
595 if (i != ndevs) {
596 for (dip = device_information, j = 0; j < i; j++, dip++)
597 stop_threads(dip);
598 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
599 stop_trace(dip);
600 return 1;
601 }
602
603 return 0;
604}
605
606static void show_stats(void)
607{
608 int i, j;
609 struct device_information *dip;
610 struct thread_information *tip;
611 unsigned long long events_processed;
612
613 if (output_name && !strcmp(output_name, "-"))
614 return;
615
616 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
617 printf("Device: %s\n", dip->path);
618 events_processed = 0;
619 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
620 printf(" CPU%3d: %20ld events\n",
621 tip->cpu, tip->events_processed);
622 events_processed += tip->events_processed;
623 }
624 printf(" Total: %20lld events\n", events_processed);
625 }
626}
627
628static char usage_str[] = \
629 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
630 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
631 "\t-d Use specified device. May also be given last after options\n" \
632 "\t-r Path to mounted relayfs, defaults to /relay\n" \
633 "\t-o File(s) to send output to\n" \
634 "\t-D Directory to prepend to output file names\n" \
635 "\t-k Kill a running trace\n" \
636 "\t-w Stop after defined time, in seconds\n" \
637 "\t-a Only trace specified actions. See documentation\n" \
638 "\t-A Give trace mask as a single value. See documentation\n" \
639 "\t-b Sub buffer size in KiB\n" \
640 "\t-n Number of sub buffers\n" \
641 "\t-v Print program version info\n\n";
642
643static void show_usage(char *program)
644{
645 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
646}
647
648static void handle_sigint(__attribute__((__unused__)) int sig)
649{
650 done = 1;
651}
652
653int main(int argc, char *argv[])
654{
655 static char default_relay_path[] = "/relay";
656 struct statfs st;
657 int i, c;
658 int stop_watch = 0;
659 int act_mask_tmp = 0;
660
661 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
662 switch (c) {
663 case 'a':
664 i = find_mask_map(optarg);
665 if (i < 0) {
666 fprintf(stderr,"Invalid action mask %s\n",
667 optarg);
668 return 1;
669 }
670 act_mask_tmp |= i;
671 break;
672
673 case 'A':
674 if ((sscanf(optarg, "%x", &i) != 1) ||
675 !valid_act_opt(i)) {
676 fprintf(stderr,
677 "Invalid set action mask %s/0x%x\n",
678 optarg, i);
679 return 1;
680 }
681 act_mask_tmp = i;
682 break;
683
684 case 'd':
685 if (resize_devices(optarg) != 0)
686 return 1;
687 break;
688
689 case 'r':
690 relay_path = optarg;
691 break;
692
693 case 'o':
694 output_name = optarg;
695 break;
696 case 'k':
697 kill_running_trace = 1;
698 break;
699 case 'w':
700 stop_watch = atoi(optarg);
701 if (stop_watch <= 0) {
702 fprintf(stderr,
703 "Invalid stopwatch value (%d secs)\n",
704 stop_watch);
705 return 1;
706 }
707 break;
708 case 'v':
709 printf("%s version %s\n", argv[0], blktrace_version);
710 return 0;
711 case 'b':
712 buf_size = atoi(optarg);
713 if (buf_size <= 0 || buf_size > 16*1024) {
714 fprintf(stderr,
715 "Invalid buffer size (%d)\n", buf_size);
716 return 1;
717 }
718 buf_size <<= 10;
719 break;
720 case 'n':
721 buf_nr = atoi(optarg);
722 if (buf_nr <= 0) {
723 fprintf(stderr,
724 "Invalid buffer nr (%d)\n", buf_nr);
725 return 1;
726 }
727 break;
728 case 'D':
729 output_dir = optarg;
730 break;
731 default:
732 show_usage(argv[0]);
733 return 1;
734 }
735 }
736
737 while (optind < argc) {
738 if (resize_devices(argv[optind++]) != 0)
739 return 1;
740 }
741
742 if (ndevs == 0) {
743 show_usage(argv[0]);
744 return 1;
745 }
746
747 if (!relay_path)
748 relay_path = default_relay_path;
749
750 if (act_mask_tmp != 0)
751 act_mask = act_mask_tmp;
752
753 if (statfs(relay_path, &st) < 0) {
754 perror("statfs");
755 fprintf(stderr,"%s does not appear to be a valid path\n",
756 relay_path);
757 return 1;
758 } else if (st.f_type != RELAYFS_TYPE) {
759 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
760 relay_path);
761 return 1;
762 }
763
764 if (open_devices() != 0)
765 return 1;
766
767 if (kill_running_trace) {
768 stop_all_traces();
769 return 0;
770 }
771
772 setlocale(LC_NUMERIC, "en_US");
773
774 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
775 if (ncpus < 0) {
776 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
777 return 1;
778 }
779
780 if (start_devices() != 0)
781 return 1;
782
783 signal(SIGINT, handle_sigint);
784 signal(SIGHUP, handle_sigint);
785 signal(SIGTERM, handle_sigint);
786 signal(SIGALRM, handle_sigint);
787
788 atexit(stop_all_tracing);
789
790 if (stop_watch)
791 alarm(stop_watch);
792
793 while (!is_done())
794 sleep(1);
795
796 stop_all_threads();
797 stop_all_traces();
798 show_stats();
799
800 return 0;
801}
802