2 * block queue tracing application
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/types.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
41 static char blktrace_version[] = "0.90";
43 #define BUF_SIZE (128 *1024)
46 #define RELAYFS_TYPE 0xF0B4A981
48 #define DECLARE_MASK_MAP(mask) { BLK_TC_##mask, #mask, "BLK_TC_"#mask }
49 #define COMPARE_MASK_MAP(mmp, str) \
50 (!strcasecmp((mmp)->short_form, (str)) || \
51 !strcasecmp((mmp)->long_form, (str)))
53 #define VALID_SET(x) ((1 <= (x)) && ((x) < (1 << BLK_TC_SHIFT)))
61 static struct mask_map mask_maps[] = {
62 DECLARE_MASK_MAP(READ),
63 DECLARE_MASK_MAP(WRITE),
64 DECLARE_MASK_MAP(BARRIER),
65 DECLARE_MASK_MAP(SYNC),
66 DECLARE_MASK_MAP(QUEUE),
67 DECLARE_MASK_MAP(REQUEUE),
68 DECLARE_MASK_MAP(ISSUE),
69 DECLARE_MASK_MAP(COMPLETE),
74 #define S_OPTS "d:a:A:r:o:kw:vb:n:"
75 static struct option l_opts[] = {
78 .has_arg = required_argument,
84 .has_arg = required_argument,
90 .has_arg = required_argument,
96 .has_arg = required_argument,
102 .has_arg = required_argument,
108 .has_arg = no_argument,
114 .has_arg = required_argument,
120 .has_arg = no_argument,
125 .name = "buffer size",
126 .has_arg = required_argument,
131 .name = "nr of sub buffers",
132 .has_arg = required_argument,
138 struct thread_information {
143 char fn[MAXPATHLEN + 64];
145 unsigned long buf_offset;
146 unsigned int buf_subbuf;
147 unsigned int sequence;
149 pthread_mutex_t *fd_lock;
152 unsigned long events_processed;
153 struct device_information *device;
156 struct device_information {
161 struct thread_information *threads;
165 static struct thread_information *thread_information;
167 static struct device_information *device_information;
169 /* command line option globals */
170 static char *relay_path;
171 static char *output_name;
172 static int act_mask = ~0U;
173 static int kill_running_trace;
175 static int buf_size = BUF_SIZE;
176 static int buf_nr = BUF_NR;
178 #define is_done() (*(volatile int *)(&done))
179 static volatile int done;
181 static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER;
183 static void exit_trace(int status);
185 static int find_mask_map(char *string)
189 for (i = 0; i < sizeof(mask_maps)/sizeof(mask_maps[0]); i++)
190 if (COMPARE_MASK_MAP(&mask_maps[i], string))
191 return mask_maps[i].mask;
196 static int start_trace(struct device_information *dip)
198 struct blk_user_trace_setup buts;
200 memset(&buts, 0, sizeof(buts));
201 buts.buf_size = buf_size;
202 buts.buf_nr = buf_nr;
203 buts.act_mask = act_mask;
205 if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
206 perror("BLKSTARTTRACE");
210 memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
211 dip->trace_started = 1;
215 static void stop_trace(struct device_information *dip)
217 if (dip->trace_started || kill_running_trace) {
218 if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
219 perror("BLKSTOPTRACE");
221 dip->trace_started = 0;
225 static void stop_all_traces(void)
227 struct device_information *dip;
230 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
234 static int get_data_read(struct thread_information *tip, void *buf, int len)
237 int ret, bytes_left = len;
239 while (!is_done() && bytes_left > 0) {
240 ret = read(tip->fd, p, bytes_left);
246 fprintf(stderr,"Thread %d failed read of %s\n",
249 } else if (ret > 0) {
250 fprintf(stderr,"Thread %d misread %s %d,%d\n",
251 tip->cpu, tip->fn, ret, len);
264 static int get_data_mmap(struct thread_information *tip, void *buf, int len,
267 if (len > (buf_size * (tip->buf_subbuf + 1)) - tip->buf_offset) {
269 if (tip->buf_subbuf == buf_nr)
272 tip->buf_offset = tip->buf_subbuf * buf_size;
276 struct blk_io_trace *t = buf;
278 memcpy(buf, tip->buf + tip->buf_offset, len);
283 if (CHECK_MAGIC(t) && t->sequence >= tip->sequence) {
284 tip->sequence = t->sequence;
294 tip->buf_offset += len;
298 static int get_data(struct thread_information *tip, void *buf, int len,
302 return get_data_mmap(tip, buf, len, check_magic);
304 return get_data_read(tip, buf, len);
307 static void *extract_data(struct thread_information *tip, char *ofn, int nb)
312 if (!get_data(tip, buf, nb, 0))
320 static inline void tip_fd_unlock(struct thread_information *tip)
323 pthread_mutex_unlock(tip->fd_lock);
326 static inline void tip_fd_lock(struct thread_information *tip)
329 pthread_mutex_lock(tip->fd_lock);
332 static void *extract(void *arg)
334 struct thread_information *tip = arg;
336 char dp[64], *pdu_data;
337 struct blk_io_trace t;
338 pid_t pid = getpid();
342 CPU_SET((tip->cpu), &cpu_mask);
344 if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
345 perror("sched_setaffinity");
349 snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
350 relay_path, tip->device->buts_name, tip->cpu);
351 tip->fd = open(tip->fn, O_RDONLY);
354 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
360 tip->buf = mmap(NULL, buf_size * buf_nr, PROT_READ,
361 MAP_PRIVATE | MAP_POPULATE, tip->fd, 0);
362 if (tip->buf == MAP_FAILED) {
370 if (get_data(tip, &t, sizeof(t), 1))
373 if (verify_trace(&t))
381 pdu_data = extract_data(tip, dp, pdu_len);
384 * now we have both trace and payload, get a lock on the
385 * output descriptor and send it off
389 ret = write(tip->ofd, &t, sizeof(t));
391 fprintf(stderr,"Thread %d failed write\n", tip->cpu);
397 ret = write(tip->ofd, pdu_data, pdu_len);
398 if (ret != pdu_len) {
399 perror("write pdu data");
409 tip->events_processed++;
415 static int start_threads(struct device_information *dip)
417 struct thread_information *tip;
419 int j, pipeline = output_name && !strcmp(output_name, "-");
421 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
425 tip->events_processed = 0;
428 tip->ofd = dup(STDOUT_FILENO);
429 tip->fd_lock = &stdout_mutex;
432 sprintf(op, "%s.blktrace.%d", output_name,
435 sprintf(op, "%s.blktrace.%d",
436 dip->buts_name, tip->cpu);
438 tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644);
446 if (pthread_create(&tip->thread, NULL, extract, tip)) {
447 perror("pthread_create");
456 static void close_thread(struct thread_information *tip)
459 munmap(tip->buf, buf_size * buf_nr);
466 tip->fd = tip->ofd = -1;
469 static void stop_threads(struct device_information *dip)
471 struct thread_information *tip;
475 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
476 if (pthread_join(tip->thread, (void *) &ret))
477 perror("thread_join");
482 static void stop_all_threads(void)
484 struct device_information *dip;
487 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
491 static void stop_all_tracing(void)
493 struct device_information *dip;
494 struct thread_information *tip;
497 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
498 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++)
504 static void exit_trace(int status)
510 static int resize_devices(char *path)
512 int size = (ndevs + 1) * sizeof(struct device_information);
514 device_information = realloc(device_information, size);
515 if (!device_information) {
516 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
519 device_information[ndevs].path = path;
524 static int open_devices(void)
526 struct device_information *dip;
529 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
530 dip->fd = open(dip->path, O_RDONLY);
539 static int start_devices(void)
541 struct device_information *dip;
544 size = ncpus * sizeof(struct thread_information);
545 thread_information = malloc(size * ndevs);
546 if (!thread_information) {
547 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
551 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
552 if (start_trace(dip)) {
554 fprintf(stderr, "Failed to start trace on %s\n",
560 for (dip = device_information, j = 0; j < i; j++, dip++)
565 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
566 dip->threads = thread_information + (i * ncpus);
567 if (start_threads(dip)) {
568 fprintf(stderr, "Failed to start worker threads\n");
573 for (dip = device_information, j = 0; j < i; j++, dip++)
575 for (dip = device_information, i = 0; i < ndevs; i++, dip++)
583 static void show_stats(void)
586 struct device_information *dip;
587 struct thread_information *tip;
588 unsigned long long events_processed;
590 if (output_name && !strcmp(output_name, "-"))
593 for (dip = device_information, i = 0; i < ndevs; i++, dip++) {
594 printf("Device: %s\n", dip->path);
595 events_processed = 0;
596 for (tip = dip->threads, j = 0; j < ncpus; j++, tip++) {
597 printf(" CPU%3d: %20ld events\n",
598 tip->cpu, tip->events_processed);
599 events_processed += tip->events_processed;
601 printf(" Total: %20lld events\n", events_processed);
605 static char usage_str[] = \
606 "-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
607 "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
608 "\t-d Use specified device. May also be given last after options\n" \
609 "\t-r Path to mounted relayfs, defaults to /relay\n" \
610 "\t-o File(s) to send output to\n" \
611 "\t-k Kill a running trace\n" \
612 "\t-w Stop after defined time, in seconds\n" \
613 "\t-a Only trace specified actions. See documentation\n" \
614 "\t-A Give trace mask as a single value. See documentation\n" \
615 "\t-b Sub buffer size in KiB\n" \
616 "\t-n Number of sub buffers\n" \
617 "\t-v Print program version info\n\n";
619 static void show_usage(char *program)
621 fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
624 static void handle_sigint(int sig)
629 int main(int argc, char *argv[])
631 static char default_relay_path[] = "/relay";
635 int act_mask_tmp = 0;
637 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
640 i = find_mask_map(optarg);
642 fprintf(stderr,"Invalid action mask %s\n",
650 if ((sscanf(optarg, "%x", &i) != 1) || !VALID_SET(i)) {
652 "Invalid set action mask %s/0x%x\n",
660 if (resize_devices(optarg) != 0)
669 output_name = optarg;
672 kill_running_trace = 1;
675 stop_watch = atoi(optarg);
676 if (stop_watch <= 0) {
678 "Invalid stopwatch value (%d secs)\n",
684 printf("%s version %s\n", argv[0], blktrace_version);
687 buf_size = atoi(optarg);
690 "Invalid buffer size (%d)\n", buf_size);
696 buf_nr = atoi(optarg);
699 "Invalid buffer nr (%d)\n", buf_nr);
709 while (optind < argc) {
710 if (resize_devices(argv[optind++]) != 0)
720 relay_path = default_relay_path;
722 if (act_mask_tmp != 0)
723 act_mask = act_mask_tmp;
725 if (statfs(relay_path, &st) < 0) {
727 fprintf(stderr,"%s does not appear to be a valid path\n",
730 } else if (st.f_type != RELAYFS_TYPE) {
731 fprintf(stderr,"%s does not appear to be a relay filesystem\n",
736 if (open_devices() != 0)
739 if (kill_running_trace) {
744 setlocale(LC_NUMERIC, "en_US");
746 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
748 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
752 if (start_devices() != 0)
755 signal(SIGINT, handle_sigint);
756 signal(SIGHUP, handle_sigint);
757 signal(SIGTERM, handle_sigint);
758 signal(SIGALRM, handle_sigint);
760 atexit(stop_all_tracing);