2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/types.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
40 static char blktrace_version[] = "0.90";
42 #define BUF_SIZE (128 *1024)
45 #define RELAYFS_TYPE 0xF0B4A981
47 #define DECLARE_MASK_MAP(mask) { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
48 #define COMPARE_MASK_MAP(mmp, str) \
49 (!strcasecmp((mmp)->short_form, (str)) || \
50 !strcasecmp((mmp)->long_form, (str)))
52 #define VALID_SET(x) ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
60 static struct mask_map mask_maps[] = {
61 DECLARE_MASK_MAP(READ),
62 DECLARE_MASK_MAP(WRITE),
63 DECLARE_MASK_MAP(BARRIER),
64 DECLARE_MASK_MAP(SYNC),
65 DECLARE_MASK_MAP(QUEUE),
66 DECLARE_MASK_MAP(REQUEUE),
67 DECLARE_MASK_MAP(ISSUE),
68 DECLARE_MASK_MAP(COMPLETE),
73 #define S_OPTS "d:a:A:r:o:kw:vb:n:D:"
74 static struct option l_opts[] = {
77 .has_arg = required_argument,
83 .has_arg = required_argument,
89 .has_arg = required_argument,
95 .has_arg = required_argument,
101 .has_arg = required_argument,
107 .has_arg = no_argument,
113 .has_arg = required_argument,
119 .has_arg = no_argument,
124 .name = "buffer size (in KiB)",
125 .has_arg = required_argument,
130 .name = "nr of sub buffers",
131 .has_arg = required_argument,
136 .name = "output directory",
137 .has_arg = required_argument,
143 struct thread_information {
148 char fn[MAXPATHLEN + 64];
150 unsigned long buf_offset;
151 unsigned int buf_subbuf;
152 unsigned int sequence;
154 pthread_mutex_t *fd_lock;
157 unsigned long events_processed;
158 struct device_information *device;
161 struct device_information {
166 struct thread_information *threads;
170 static struct thread_information *thread_information;
172 static struct device_information *device_information;
174 /* command line option globals */
175 static char *relay_path;
176 static char *output_name;
177 static char *output_dir;
178 static int act_mask = ~0U;
179 static int kill_running_trace;
180 static unsigned int buf_size = BUF_SIZE;
181 static unsigned int buf_nr = BUF_NR;
183 #define is_done() (*(volatile int *)(&done))
184 static volatile int done;
186 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
188 static void exit_trace(int status);
190 static int find_mask_map(char *string)
194 for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
195 if (COMPARE_MASK_MAP(&mask_maps[i], string))
196 return mask_maps[i].mask;
201 static int start_trace(struct device_information *dip)
203 struct blk_user_trace_setup buts;
205 memset(&buts, 0, sizeof(buts));
206 buts.buf_size = buf_size;
207 buts.buf_nr = buf_nr;
208 buts.act_mask = act_mask;
210 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
211 perror("BLKSTARTTRACE");
215 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
216 dip->trace_started = 1;
220 static void stop_trace(struct device_information *dip)
222 if (dip->trace_started || kill_running_trace) {
223 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
224 perror("BLKSTOPTRACE");
226 dip->trace_started = 0;
230 static void stop_all_traces(void)
232 struct device_information *dip;
235 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
239 static int read_data(struct thread_information *tip, void *buf, int len)
242 int ret, bytes_left = len;
244 while (!is_done() && bytes_left > 0) {
245 ret = read(tip->fd, p, bytes_left);
246 if (ret == bytes_left)
251 fprintf(stderr,"Thread %d failed read of %s\n",
254 } else if (ret > 0) {
264 static int write_data(int fd, void *buf, unsigned int buf_len)
269 bytes_left = buf_len;
270 while (bytes_left > 0) {
271 ret = write(fd, p, bytes_left);
272 if (ret == bytes_left)
278 } else if (ret > 0) {
282 fprintf(stderr, "Zero write?\n");
290 static void *extract_data(struct thread_information *tip, int nb)
295 if (!read_data(tip, buf, nb))
303 * trace may start inside 'bit' or may need to be gotten further on
305 static int get_event_slow(struct thread_information *tip,
306 struct blk_io_trace *bit)
308 const int inc = sizeof(__u32);
309 struct blk_io_trace foo;
314 * check is trace is inside
318 while (offset < sizeof(*bit)) {
322 memcpy(&foo, p, inc);
324 if (CHECK_MAGIC(&foo))
329 * part trace found inside, read the rest
331 if (offset < sizeof(*bit)) {
332 int good_bytes = sizeof(*bit) - offset;
334 memmove(bit, p, good_bytes);
335 p = (void *) bit + good_bytes;
337 return read_data(tip, p, offset);
341 * nothing found, keep looking for start of trace
344 if (read_data(tip, bit, sizeof(bit->magic)))
346 } while (!CHECK_MAGIC(bit));
349 * now get the rest of it
352 if (!read_data(tip, p, sizeof(*bit) - inc))
359 * Sometimes relayfs screws us a little, if an event crosses a sub buffer
360 * boundary. So keep looking forward in the trace data until an event
363 static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
366 * optimize for the common fast case, a full trace read that
369 if (read_data(tip, bit, sizeof(*bit)))
372 if (CHECK_MAGIC(bit))
376 * ok that didn't work, the event may start somewhere inside the
379 return get_event_slow(tip, bit);
382 static inline void tip_fd_unlock(struct thread_information *tip)
385 pthread_mutex_unlock(tip->fd_lock);
388 static inline void tip_fd_lock(struct thread_information *tip)
391 pthread_mutex_lock(tip->fd_lock);
394 static void *extract(void *arg)
396 struct thread_information *tip = arg;
399 struct blk_io_trace t;
400 pid_t pid = getpid();
404 CPU_SET((tip->cpu), &cpu_mask);
406 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
407 perror("sched_setaffinity");
411 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
412 relay_path, tip->device->buts_name, tip->cpu);
413 tip->fd = open(tip->fn, O_RDONLY);
416 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
423 if (get_event(tip, &t))
426 if (verify_trace(&t))
434 pdu_data = extract_data(tip, pdu_len);
440 * now we have both trace and payload, get a lock on the
441 * output descriptor and send it off
445 if (write_data(tip->ofd, &t, sizeof(t))) {
450 if (pdu_data && write_data(tip->ofd, pdu_data, pdu_len)) {
462 tip->events_processed++;
469 static int start_threads(struct device_information *dip)
471 struct thread_information *tip;
473 int j, pipeline = output_name && !strcmp(output_name, "-");
476 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
480 tip->events_processed = 0;
483 tip->ofd = dup(STDOUT_FILENO);
484 tip->fd_lock = &stdout_mutex;
489 len = sprintf(op, "%s/", output_dir);
492 sprintf(op + len, "%s.blktrace.%d", output_name,
495 sprintf(op + len, "%s.blktrace.%d",
496 dip->buts_name, tip->cpu);
498 tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
506 if (pthread_create(&tip->thread, NULL, extract, tip)) {
507 perror("pthread_create");
516 static void close_thread(struct thread_information *tip)
523 tip->fd = tip->ofd = -1;
526 static void stop_threads(struct device_information *dip)
528 struct thread_information *tip;
532 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
533 if (pthread_join(tip->thread, (void *) &ret))
534 perror("thread_join");
539 static void stop_all_threads(void)
541 struct device_information *dip;
544 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
548 static void stop_all_tracing(void)
550 struct device_information *dip;
551 struct thread_information *tip;
554 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
555 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
561 static void exit_trace(int status)
567 static int resize_devices(char *path)
569 int size = (ndevs + 1) * sizeof(struct device_information);
571 device_information = realloc(device_information, size);
572 if (!device_information) {
573 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
576 device_information[ndevs].path = path;
581 static int open_devices(void)
583 struct device_information *dip;
586 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
587 dip->fd = open(dip->path, O_RDONLY);
596 static int start_devices(void)
598 struct device_information *dip;
601 size = ncpus * sizeof(struct thread_information);
602 thread_information = malloc(size * ndevs);
603 if (!thread_information) {
604 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
608 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
609 if (start_trace(dip)) {
611 fprintf(stderr, "Failed to start trace on %s\n",
617 for (dip = device_information, j = 0; j < i; j++, dip++)
622 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
623 dip->threads = thread_information + (i * ncpus);
624 if (start_threads(dip)) {
625 fprintf(stderr, "Failed to start worker threads\n");
630 for (dip = device_information, j = 0; j < i; j++, dip++)
632 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
640 static void show_stats(void)
643 struct device_information *dip;
644 struct thread_information *tip;
645 unsigned long long events_processed;
647 if (output_name && !strcmp(output_name, "-"))
650 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
651 printf("Device: %s\n", dip->path);
652 events_processed = 0;
653 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
654 printf(" CPU%3d: %20ld events\n",
655 tip->cpu, tip->events_processed);
656 events_processed += tip->events_processed;
658 printf(" Total: %20lld events\n", events_processed);
662 static char usage_str[] = \
663 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
664 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
665 "\t-d Use specified device. May also be given last after options\n" \
666 "\t-r Path to mounted relayfs, defaults to /relay\n" \
667 "\t-o File(s) to send output to\n" \
668 "\t-D Directory to prepend to output file names\n" \
669 "\t-k Kill a running trace\n" \
670 "\t-w Stop after defined time, in seconds\n" \
671 "\t-a Only trace specified actions. See documentation\n" \
672 "\t-A Give trace mask as a single value. See documentation\n" \
673 "\t-b Sub buffer size in KiB\n" \
674 "\t-n Number of sub buffers\n" \
675 "\t-v Print program version info\n\n";
677 static void show_usage(char *program)
679 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
682 static void handle_sigint(__attribute__((__unused__)) int sig)
687 int main(int argc, char *argv[])
689 static char default_relay_path[] = "/relay";
693 int act_mask_tmp = 0;
695 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
698 i = find_mask_map(optarg);
700 fprintf(stderr,"Invalid action mask %s\n",
708 if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
710 "Invalid set action mask %s/0x%x\n",
718 if (resize_devices(optarg) != 0)
727 output_name = optarg;
730 kill_running_trace = 1;
733 stop_watch = atoi(optarg);
734 if (stop_watch <= 0) {
736 "Invalid stopwatch value (%d secs)\n",
742 printf("%s version %s\n", argv[0], blktrace_version);
745 buf_size = atoi(optarg);
748 "Invalid buffer size (%d)\n", buf_size);
754 buf_nr = atoi(optarg);
757 "Invalid buffer nr (%d)\n", buf_nr);
770 while (optind < argc) {
771 if (resize_devices(argv[optind++]) != 0)
781 relay_path = default_relay_path;
783 if (act_mask_tmp != 0)
784 act_mask = act_mask_tmp;
786 if (statfs(relay_path, &st) < 0) {
788 fprintf(stderr,"%s does not appear to be a valid path\n",
791 } else if (st.f_type != RELAYFS_TYPE) {
792 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
797 if (open_devices() != 0)
800 if (kill_running_trace) {
805 setlocale(LC_NUMERIC, "en_US");
807 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
809 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
813 if (start_devices() != 0)
816 signal(SIGINT, handle_sigint);
817 signal(SIGHUP, handle_sigint);
818 signal(SIGTERM, handle_sigint);
819 signal(SIGALRM, handle_sigint);
821 atexit(stop_all_tracing);