2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/types.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
44 static char blktrace_version[] = "0.99";
47 * You may want to increase this even more, if you are logging at a high
48 * rate and see skipped/missed events
50 #define BUF_SIZE (512 * 1024)
53 #define OFILE_BUF (128 * 1024)
55 #define RELAYFS_TYPE 0xF0B4A981
57 #define RING_INIT_NR (2)
58 #define RING_MAX_NR (16UL)
60 #define S_OPTS "d:a:A:r:o:kw:Vb:n:D:"
61 static struct option l_opts[] = {
64 .has_arg = required_argument,
70 .has_arg = required_argument,
76 .has_arg = required_argument,
82 .has_arg = required_argument,
88 .has_arg = required_argument,
94 .has_arg = no_argument,
100 .has_arg = required_argument,
106 .has_arg = no_argument,
111 .name = "buffer-size",
112 .has_arg = required_argument,
117 .name = "num-sub-buffers",
118 .has_arg = required_argument,
123 .name = "output-dir",
124 .has_arg = required_argument,
134 struct list_head list;
137 unsigned int max_len;
140 struct thread_information {
146 unsigned long fd_off;
147 unsigned long fd_size;
148 unsigned long fd_max_size;
149 char fn[MAXPATHLEN + 64];
155 unsigned long events_processed;
156 struct device_information *device;
160 pthread_mutex_t lock;
161 struct list_head subbuf_list;
167 struct device_information {
171 volatile int trace_started;
172 unsigned long drop_count;
173 struct thread_information *threads;
177 static struct thread_information *thread_information;
179 static struct device_information *device_information;
181 /* command line option globals */
182 static char *relay_path;
183 static char *output_name;
184 static char *output_dir;
185 static int act_mask = ~0U;
186 static int kill_running_trace;
187 static unsigned long buf_size = BUF_SIZE;
188 static unsigned long buf_nr = BUF_NR;
190 #define is_done() (*(volatile int *)(&done))
191 static volatile int done;
193 #define is_trace_stopped() (*(volatile int *)(&trace_stopped))
194 static volatile int trace_stopped;
196 #define is_stat_shown() (*(volatile int *)(&stat_shown))
197 static volatile int stat_shown;
199 static void exit_trace(int status);
201 #define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
202 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
204 #define __for_each_dip(__d, __i, __e) \
205 for (__i = 0, __d = device_information; __i < __e; __i++, __d++)
207 #define for_each_dip(__d, __i) __for_each_dip(__d, __i, ndevs)
208 #define for_each_tip(__d, __t, __j) \
209 for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
211 static int get_dropped_count(const char *buts_name)
214 char tmp[MAXPATHLEN + 64];
216 snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
217 relay_path, buts_name);
219 fd = open(tmp, O_RDONLY);
222 * this may be ok, if the kernel doesn't support dropped counts
227 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
231 if (read(fd, tmp, sizeof(tmp)) < 0) {
242 static int start_trace(struct device_information *dip)
244 struct blk_user_trace_setup buts;
246 memset(&buts, 0, sizeof(buts));
247 buts.buf_size = buf_size;
248 buts.buf_nr = buf_nr;
249 buts.act_mask = act_mask;
251 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
252 perror("BLKSTARTTRACE");
256 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
257 dip_set_tracing(dip, 1);
261 static void stop_trace(struct device_information *dip)
263 if (dip_tracing(dip) || kill_running_trace) {
264 dip_set_tracing(dip, 0);
266 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
267 perror("BLKSTOPTRACE");
274 static void stop_all_traces(void)
276 struct device_information *dip;
279 for_each_dip(dip, i) {
280 dip->drop_count = get_dropped_count(dip->buts_name);
285 static void wait_for_data(struct thread_information *tip)
287 struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
291 if (pfd.revents & POLLIN)
293 if (tip->ofile_stdout)
295 } while (!is_done());
298 static int read_data(struct thread_information *tip, void *buf, int len)
305 ret = read(tip->fd, buf, len);
311 if (errno != EAGAIN) {
313 fprintf(stderr,"Thread %d failed read of %s\n",
319 } while (!is_done());
324 static inline void tip_fd_unlock(struct thread_information *tip)
326 pthread_mutex_unlock(&tip->lock);
329 static inline void tip_fd_lock(struct thread_information *tip)
331 pthread_mutex_lock(&tip->lock);
334 static int get_subbuf(struct thread_information *tip)
336 struct tip_subbuf *ts;
340 * live tracing should get a lower count to make it more "realtime"
342 if (tip->ofile_stdout)
347 ts = malloc(sizeof(*ts));
348 ts->buf = malloc(ts_size);
349 ts->max_len = ts_size;
351 ret = read_data(tip, ts->buf, ts_size);
355 list_add_tail(&ts->list, &tip->subbuf_list);
365 static void close_thread(struct thread_information *tip)
371 if (tip->ofile_buffer)
372 free(tip->ofile_buffer);
378 tip->ofile_buffer = NULL;
382 static void *thread_main(void *arg)
384 struct thread_information *tip = arg;
385 pid_t pid = getpid();
389 CPU_SET((tip->cpu), &cpu_mask);
391 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
392 perror("sched_setaffinity");
396 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
397 relay_path, tip->device->buts_name, tip->cpu);
398 tip->fd = open(tip->fn, O_RDONLY);
401 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
415 static int write_data(struct thread_information *tip,
416 void *buf, unsigned int buf_len)
421 ret = fwrite(buf, buf_len, 1, tip->ofile);
431 if (tip->ofile_stdout)
437 static int flush_subbuf(struct thread_information *tip, struct tip_subbuf *ts)
439 unsigned int offset = 0;
440 struct blk_io_trace *t;
441 int pdu_len, events = 0;
444 * surplus from last run, a little ugly...
446 if (tip->leftover_ts_len) {
447 if (ts->len + tip->leftover_ts_len > ts->max_len)
448 ts->buf = realloc(ts->buf, ts->len + tip->leftover_ts_len);
450 memmove(ts->buf + tip->leftover_ts_len, ts->buf, ts->len);
451 memcpy(ts->buf, tip->leftover_ts, tip->leftover_ts_len);
452 ts->len += tip->leftover_ts_len;
453 tip->leftover_ts_len = 0;
456 while (offset + sizeof(*t) <= ts->len) {
457 t = ts->buf + offset;
462 pdu_len = t->pdu_len;
464 if (offset + sizeof(*t) + pdu_len > ts->len)
469 if (write_data(tip, t, sizeof(*t) + pdu_len))
472 offset += sizeof(*t) + pdu_len;
473 tip->events_processed++;
478 * leftover bytes, save them for next time
480 if (offset != ts->len) {
481 int surplus = ts->len - offset;
483 t = ts->buf + offset;
484 if (surplus > tip->leftover_ts_max) {
485 tip->leftover_ts = realloc(tip->leftover_ts, surplus);
486 tip->leftover_ts_max = surplus;
489 memcpy(tip->leftover_ts, ts->buf + offset, surplus);
490 tip->leftover_ts_len = surplus;
498 static int write_tip_events(struct thread_information *tip)
500 struct tip_subbuf *ts = NULL;
503 if (!list_empty(&tip->subbuf_list)) {
504 ts = list_entry(tip->subbuf_list.next, struct tip_subbuf, list);
510 return flush_subbuf(tip, ts);
516 * scans the tips we know and writes out the subbuffers we accumulate
518 static void get_and_write_events(void)
520 struct device_information *dip;
521 struct thread_information *tip;
522 int i, j, events, ret, all_exited;
527 for_each_dip(dip, i) {
528 for_each_tip(dip, tip, j) {
529 ret = write_tip_events(tip);
545 for_each_dip(dip, i) {
546 for_each_tip(dip, tip, j) {
547 ret = write_tip_events(tip);
550 all_exited += !tip->exited;
554 } while (events && !all_exited);
557 static int start_threads(struct device_information *dip)
559 struct thread_information *tip;
561 int j, pipeline = output_name && !strcmp(output_name, "-");
562 int len, mode, vbuf_size;
564 for_each_tip(dip, tip, j) {
567 tip->events_processed = 0;
568 INIT_LIST_HEAD(&tip->subbuf_list);
569 tip->leftover_ts_len = 0;
570 tip->leftover_ts_max = 0;
571 tip->leftover_ts = NULL;
574 tip->ofile = fdopen(STDOUT_FILENO, "w");
575 tip->ofile_stdout = 1;
582 len = sprintf(op, "%s/", output_dir);
585 sprintf(op + len, "%s.blktrace.%d", output_name,
588 sprintf(op + len, "%s.blktrace.%d",
589 dip->buts_name, tip->cpu);
591 tip->ofile = fopen(op, "w");
592 tip->ofile_stdout = 0;
594 vbuf_size = OFILE_BUF;
597 if (tip->ofile == NULL) {
602 tip->ofile_buffer = malloc(vbuf_size);
603 if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
609 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
610 perror("pthread_create");
619 static void stop_threads(struct device_information *dip)
621 struct thread_information *tip;
625 for_each_tip(dip, tip, i) {
626 (void) pthread_join(tip->thread, (void *) &ret);
631 static void stop_all_threads(void)
633 struct device_information *dip;
640 static void stop_all_tracing(void)
642 struct device_information *dip;
649 static void exit_trace(int status)
651 if (!is_trace_stopped()) {
660 static int resize_devices(char *path)
662 int size = (ndevs + 1) * sizeof(struct device_information);
664 device_information = realloc(device_information, size);
665 if (!device_information) {
666 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
669 device_information[ndevs].path = path;
674 static int open_devices(void)
676 struct device_information *dip;
679 for_each_dip(dip, i) {
680 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
690 static int start_devices(void)
692 struct device_information *dip;
695 size = ncpus * sizeof(struct thread_information);
696 thread_information = malloc(size * ndevs);
697 if (!thread_information) {
698 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
702 for_each_dip(dip, i) {
703 if (start_trace(dip)) {
705 fprintf(stderr, "Failed to start trace on %s\n",
712 __for_each_dip(dip, j, i)
718 for_each_dip(dip, i) {
719 dip->threads = thread_information + (i * ncpus);
720 if (start_threads(dip)) {
721 fprintf(stderr, "Failed to start worker threads\n");
727 __for_each_dip(dip, j, i)
738 static void show_stats(void)
740 struct device_information *dip;
741 struct thread_information *tip;
742 unsigned long long events_processed;
743 unsigned long total_drops;
744 int i, j, no_stdout = 0;
749 if (output_name && !strcmp(output_name, "-"))
755 for_each_dip(dip, i) {
757 printf("Device: %s\n", dip->path);
758 events_processed = 0;
759 for_each_tip(dip, tip, j) {
761 printf(" CPU%3d: %20ld events\n",
762 tip->cpu, tip->events_processed);
763 events_processed += tip->events_processed;
765 total_drops += dip->drop_count;
767 printf(" Total: %20lld events (dropped %lu)\n",
768 events_processed, dip->drop_count);
772 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
775 static char usage_str[] = \
776 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
777 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
778 "\t-d Use specified device. May also be given last after options\n" \
779 "\t-r Path to mounted relayfs, defaults to /relay\n" \
780 "\t-o File(s) to send output to\n" \
781 "\t-D Directory to prepend to output file names\n" \
782 "\t-k Kill a running trace\n" \
783 "\t-w Stop after defined time, in seconds\n" \
784 "\t-a Only trace specified actions. See documentation\n" \
785 "\t-A Give trace mask as a single value. See documentation\n" \
786 "\t-b Sub buffer size in KiB\n" \
787 "\t-n Number of sub buffers\n" \
788 "\t-v Print program version info\n\n";
790 static void show_usage(char *program)
792 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
794 static void handle_sigint(__attribute__((__unused__)) int sig)
799 int main(int argc, char *argv[])
801 static char default_relay_path[] = "/relay";
805 int act_mask_tmp = 0;
807 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
810 i = find_mask_map(optarg);
812 fprintf(stderr,"Invalid action mask %s\n",
820 if ((sscanf(optarg, "%x", &i) != 1) ||
823 "Invalid set action mask %s/0x%x\n",
831 if (resize_devices(optarg) != 0)
840 output_name = optarg;
843 kill_running_trace = 1;
846 stop_watch = atoi(optarg);
847 if (stop_watch <= 0) {
849 "Invalid stopwatch value (%d secs)\n",
855 printf("%s version %s\n", argv[0], blktrace_version);
858 buf_size = strtoul(optarg, NULL, 10);
859 if (buf_size <= 0 || buf_size > 16*1024) {
861 "Invalid buffer size (%lu)\n",buf_size);
867 buf_nr = strtoul(optarg, NULL, 10);
870 "Invalid buffer nr (%lu)\n", buf_nr);
883 while (optind < argc) {
884 if (resize_devices(argv[optind++]) != 0)
894 relay_path = default_relay_path;
896 if (act_mask_tmp != 0)
897 act_mask = act_mask_tmp;
899 if (statfs(relay_path, &st) < 0) {
901 fprintf(stderr,"%s does not appear to be a valid path\n",
904 } else if (st.f_type != (long) RELAYFS_TYPE) {
905 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
910 if (open_devices() != 0)
913 if (kill_running_trace) {
918 setlocale(LC_NUMERIC, "en_US");
920 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
922 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
926 if (start_devices() != 0)
929 signal(SIGINT, handle_sigint);
930 signal(SIGHUP, handle_sigint);
931 signal(SIGTERM, handle_sigint);
932 signal(SIGALRM, handle_sigint);
934 atexit(stop_all_tracing);
939 get_and_write_events();
941 if (!is_trace_stopped()) {