2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/types.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
41 static char blktrace_version[] = "0.99";
44 * You may want to increase this even more, if you are logging at a high
45 * rate and see skipped/missed events
47 #define BUF_SIZE (512 * 1024)
50 #define OFILE_BUF (128 * 1024)
52 #define RELAYFS_TYPE 0xF0B4A981
54 #define S_OPTS "d:a:A:r:o:kw:Vb:n:D:"
55 static struct option l_opts[] = {
58 .has_arg = required_argument,
64 .has_arg = required_argument,
70 .has_arg = required_argument,
76 .has_arg = required_argument,
82 .has_arg = required_argument,
88 .has_arg = no_argument,
94 .has_arg = required_argument,
100 .has_arg = no_argument,
105 .name = "buffer-size",
106 .has_arg = required_argument,
111 .name = "num-sub-buffers",
112 .has_arg = required_argument,
117 .name = "output-dir",
118 .has_arg = required_argument,
127 struct thread_information {
132 char fn[MAXPATHLEN + 64];
134 unsigned long buf_offset;
135 unsigned int buf_subbuf;
136 unsigned int sequence;
138 pthread_mutex_t *fd_lock;
144 unsigned long events_processed;
145 struct device_information *device;
148 struct device_information {
152 volatile int trace_started;
153 struct thread_information *threads;
157 static struct thread_information *thread_information;
159 static struct device_information *device_information;
161 /* command line option globals */
162 static char *relay_path;
163 static char *output_name;
164 static char *output_dir;
165 static int act_mask = ~0U;
166 static int kill_running_trace;
167 static unsigned int buf_size = BUF_SIZE;
168 static unsigned int buf_nr = BUF_NR;
170 #define is_done() (*(volatile int *)(&done))
171 static volatile int done;
173 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
175 static void exit_trace(int status);
177 #define tip_closed(tip) (*(volatile int *)(&(tip)->closed))
178 #define set_tip_closed(tip) ((tip)->closed = 1)
180 #define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
181 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
183 #define __for_each_dip(__d, __i, __e) \
184 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
186 #define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
187 #define for_each_tip(__d, __t, __i) \
188 for (__i = 0, __t = (__d)->threads; __i < ncpus; __i++, __t++)
190 static int start_trace(struct device_information *dip)
192 struct blk_user_trace_setup buts;
194 memset(&buts, 0, sizeof(buts));
195 buts.buf_size = buf_size;
196 buts.buf_nr = buf_nr;
197 buts.act_mask = act_mask;
199 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
200 perror("BLKSTARTTRACE");
204 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
205 dip_set_tracing(dip, 1);
209 static void stop_trace(struct device_information *dip)
211 if (dip_tracing(dip) || kill_running_trace) {
212 dip_set_tracing(dip, 0);
214 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
215 perror("BLKSTOPTRACE");
222 static void stop_all_traces(void)
224 struct device_information *dip;
231 static int read_data(struct thread_information *tip, void *buf, int len)
234 int ret, bytes_left = len;
236 while (!is_done() && bytes_left > 0) {
237 ret = read(tip->fd, p, bytes_left);
238 if (ret == bytes_left)
243 fprintf(stderr,"Thread %d failed read of %s\n",
246 } else if (ret > 0) {
256 static int write_data(FILE *file, void *buf, unsigned int buf_len)
261 bytes_left = buf_len;
262 while (bytes_left > 0) {
263 ret = fwrite(p, bytes_left, 1, file);
276 static void *extract_data(struct thread_information *tip, int nb)
281 if (!read_data(tip, buf, nb))
289 * trace may start inside 'bit' or may need to be gotten further on
291 static int get_event_slow(struct thread_information *tip,
292 struct blk_io_trace *bit)
294 const int inc = sizeof(__u32);
295 struct blk_io_trace foo;
300 * check is trace is inside
304 while (offset < sizeof(*bit)) {
308 memcpy(&foo, p, inc);
310 if (CHECK_MAGIC(&foo))
315 * part trace found inside, read the rest
317 if (offset < sizeof(*bit)) {
318 int good_bytes = sizeof(*bit) - offset;
320 memmove(bit, p, good_bytes);
321 p = (void *) bit + good_bytes;
323 return read_data(tip, p, offset);
327 * nothing found, keep looking for start of trace
330 if (read_data(tip, bit, sizeof(bit->magic)))
332 } while (!CHECK_MAGIC(bit));
335 * now get the rest of it
338 if (!read_data(tip, p, sizeof(*bit) - inc))
345 * Sometimes relayfs screws us a little, if an event crosses a sub buffer
346 * boundary. So keep looking forward in the trace data until an event
349 static int get_event(struct thread_information *tip, struct blk_io_trace *bit)
352 * optimize for the common fast case, a full trace read that
355 if (read_data(tip, bit, sizeof(*bit)))
358 if (CHECK_MAGIC(bit))
362 * ok that didn't work, the event may start somewhere inside the
365 return get_event_slow(tip, bit);
368 static inline void tip_fd_unlock(struct thread_information *tip)
371 pthread_mutex_unlock(tip->fd_lock);
374 static inline void tip_fd_lock(struct thread_information *tip)
377 pthread_mutex_lock(tip->fd_lock);
380 static void *extract(void *arg)
382 struct thread_information *tip = arg;
385 struct blk_io_trace t;
386 pid_t pid = getpid();
390 CPU_SET((tip->cpu), &cpu_mask);
392 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
393 perror("sched_setaffinity");
397 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
398 relay_path, tip->device->buts_name, tip->cpu);
399 tip->fd = open(tip->fn, O_RDONLY);
402 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
409 if (get_event(tip, &t))
412 if (verify_trace(&t))
420 pdu_data = extract_data(tip, pdu_len);
426 * now we have both trace and payload, get a lock on the
427 * output descriptor and send it off
431 if (write_data(tip->ofile, &t, sizeof(t))) {
436 if (pdu_data && write_data(tip->ofile, pdu_data, pdu_len)) {
448 tip->events_processed++;
455 static void close_thread(struct thread_information *tip)
466 if (tip->ofile_buffer)
467 free(tip->ofile_buffer);
471 tip->ofile_buffer = NULL;
474 static int start_threads(struct device_information *dip)
476 struct thread_information *tip;
478 int j, pipeline = output_name && !strcmp(output_name, "-");
481 for_each_tip(dip, tip, j) {
485 tip->events_processed = 0;
488 tip->ofile = fdopen(STDOUT_FILENO, "w");
489 tip->fd_lock = &stdout_mutex;
496 len = sprintf(op, "%s/", output_dir);
499 sprintf(op + len, "%s.blktrace.%d", output_name,
502 sprintf(op + len, "%s.blktrace.%d",
503 dip->buts_name, tip->cpu);
505 tip->ofile = fopen(op, "w");
507 buf_size = OFILE_BUF;
510 if (tip->ofile == NULL) {
515 tip->ofile_buffer = malloc(buf_size);
516 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, buf_size)) {
522 if (pthread_create(&tip->thread, NULL, extract, tip)) {
523 perror("pthread_create");
532 static void stop_threads(struct device_information *dip)
534 struct thread_information *tip;
538 for_each_tip(dip, tip, i) {
539 if (pthread_join(tip->thread, (void *) &ret))
540 perror("thread_join");
546 static void stop_all_threads(void)
548 struct device_information *dip;
555 static void stop_all_tracing(void)
557 struct device_information *dip;
558 struct thread_information *tip;
561 for_each_dip(dip, i) {
562 for_each_tip(dip, tip, j)
569 static void exit_trace(int status)
575 static int resize_devices(char *path)
577 int size = (ndevs + 1) * sizeof(struct device_information);
579 device_information = realloc(device_information, size);
580 if (!device_information) {
581 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
584 device_information[ndevs].path = path;
589 static int open_devices(void)
591 struct device_information *dip;
594 for_each_dip(dip, i) {
595 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
605 static int start_devices(void)
607 struct device_information *dip;
610 size = ncpus * sizeof(struct thread_information);
611 thread_information = malloc(size * ndevs);
612 if (!thread_information) {
613 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
617 for_each_dip(dip, i) {
618 if (start_trace(dip)) {
620 fprintf(stderr, "Failed to start trace on %s\n",
627 __for_each_dip(dip, j, i)
633 for_each_dip(dip, i) {
634 dip->threads = thread_information + (i * ncpus);
635 if (start_threads(dip)) {
636 fprintf(stderr, "Failed to start worker threads\n");
642 __for_each_dip(dip, j, i)
653 static int get_dropped_count(const char *buts_name)
656 char tmp[MAXPATHLEN + 64];
658 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
659 relay_path, buts_name);
661 fd = open(tmp, O_RDONLY);
664 * this may be ok, if the kernel doesn't support dropped counts
669 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
673 if (read(fd, tmp, sizeof(tmp)) < 0) {
684 static void show_stats(void)
687 struct device_information *dip;
688 struct thread_information *tip;
689 unsigned long long events_processed;
691 if (output_name && !strcmp(output_name, "-"))
694 for_each_dip(dip, i) {
695 printf("Device: %s\n", dip->path);
696 events_processed = 0;
697 for_each_tip(dip, tip, j) {
698 printf(" CPU%3d: %20ld events\n",
699 tip->cpu, tip->events_processed);
700 events_processed += tip->events_processed;
702 dropped = get_dropped_count(dip->buts_name);
703 printf(" Total: %20lld events (dropped %d)\n",
704 events_processed, dropped);
708 static char usage_str[] = \
709 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
710 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
711 "\t-d Use specified device. May also be given last after options\n" \
712 "\t-r Path to mounted relayfs, defaults to /relay\n" \
713 "\t-o File(s) to send output to\n" \
714 "\t-D Directory to prepend to output file names\n" \
715 "\t-k Kill a running trace\n" \
716 "\t-w Stop after defined time, in seconds\n" \
717 "\t-a Only trace specified actions. See documentation\n" \
718 "\t-A Give trace mask as a single value. See documentation\n" \
719 "\t-b Sub buffer size in KiB\n" \
720 "\t-n Number of sub buffers\n" \
721 "\t-v Print program version info\n\n";
723 static void show_usage(char *program)
725 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
728 static void handle_sigint(__attribute__((__unused__)) int sig)
734 int main(int argc, char *argv[])
736 static char default_relay_path[] = "/relay";
740 int act_mask_tmp = 0;
742 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
745 i = find_mask_map(optarg);
747 fprintf(stderr,"Invalid action mask %s\n",
755 if ((sscanf(optarg, "%x", &i) != 1) ||
758 "Invalid set action mask %s/0x%x\n",
766 if (resize_devices(optarg) != 0)
775 output_name = optarg;
778 kill_running_trace = 1;
781 stop_watch = atoi(optarg);
782 if (stop_watch <= 0) {
784 "Invalid stopwatch value (%d secs)\n",
790 printf("%s version %s\n", argv[0], blktrace_version);
793 buf_size = atoi(optarg);
794 if (buf_size <= 0 || buf_size > 16*1024) {
796 "Invalid buffer size (%d)\n", buf_size);
802 buf_nr = atoi(optarg);
805 "Invalid buffer nr (%d)\n", buf_nr);
818 while (optind < argc) {
819 if (resize_devices(argv[optind++]) != 0)
829 relay_path = default_relay_path;
831 if (act_mask_tmp != 0)
832 act_mask = act_mask_tmp;
834 if (statfs(relay_path, &st) < 0) {
836 fprintf(stderr,"%s does not appear to be a valid path\n",
839 } else if (st.f_type != (long) RELAYFS_TYPE) {
840 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
845 if (open_devices() != 0)
848 if (kill_running_trace) {
853 setlocale(LC_NUMERIC, "en_US");
855 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
857 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
861 if (start_devices() != 0)
864 signal(SIGINT, handle_sigint);
865 signal(SIGHUP, handle_sigint);
866 signal(SIGTERM, handle_sigint);
867 signal(SIGALRM, handle_sigint);
869 atexit(stop_all_tracing);