From d5396421d8e54ccdd14f9e35c4bfd50864c61397 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Sep 2005 13:09:45 +0200 Subject: [PATCH] [PATCH] Support for live tracing This patch adds support for doing live traces again, by allowing pipe of data from blktrace to blkparse. A structural change was needed in blk_io_trace to pass the CPU number info directly instead of inferring it from the filename. To parse realtime: $ blktrace -d /dev/xxx -o - | blkparse -i - --- Makefile | 2 +- blkparse.c | 451 ++++++++++++------ blktrace.c | 98 ++-- blktrace.h | 2 +- blktrace_api.h | 3 +- ...-2.6.13-mm1-C0 => blk-trace-2.6.13-mm1-D0} | 100 ++-- 6 files changed, 431 insertions(+), 225 deletions(-) rename kernel/{blk-trace-2.6.13-mm1-C0 => blk-trace-2.6.13-mm1-D0} (86%) diff --git a/Makefile b/Makefile index 9dbd08d..3233129 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC = gcc -CFLAGS = -Wall -O2 -D_GNU_SOURCE +CFLAGS = -Wall -O2 -g -D_GNU_SOURCE PROG = blkparse blktrace LIBS = -lpthread diff --git a/blkparse.c b/blkparse.c index 20bdc03..39e1fbc 100644 --- a/blkparse.c +++ b/blkparse.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "blktrace.h" #include "rbtree.h" @@ -34,9 +35,10 @@ #define SECONDS(x) ((unsigned long long)(x) / 1000000000) #define NANO_SECONDS(x) ((unsigned long long)(x) % 1000000000) -int backwards; -unsigned long long genesis_time, last_reported_time; -struct per_file_info { +static int backwards; +static unsigned long long genesis_time, last_reported_time; + +struct per_cpu_info { int cpu; int nelems; @@ -52,20 +54,43 @@ struct per_file_info { unsigned long long iread_kb, iwrite_kb; }; +#define S_OPTS "i:o:" +static struct option l_opts[] = { + { + .name = "input", + .has_arg = 1, + .flag = NULL, + .val = 'i' + }, + { + .name = "output", + .has_arg = 1, + .flag = NULL, + .val = 'o' + }, + { + .name = NULL, + .has_arg = 0, + .flag = NULL, + .val = 0 + } +}; + static struct rb_root rb_root; struct trace { struct blk_io_trace *bit; - unsigned int cpu; struct rb_node rb_node; }; -static struct per_file_info per_file_info[MAX_CPUS]; -static struct per_file_info *cur_file; +static struct per_cpu_info per_cpu_info[MAX_CPUS]; static unsigned long long events; static int max_cpus; +static int nfiles; + +static char *dev, *output_name; static inline void check_time(struct blk_io_trace *bit) { @@ -76,64 +101,67 @@ static inline void check_time(struct blk_io_trace *bit) last_reported_time = this; } -static inline void account_m(struct per_file_info *pfi, int rw, +static inline void account_m(struct per_cpu_info *pci, int rw, unsigned int bytes) { if (rw) { - pfi->mwrites++; - pfi->qwrite_kb += bytes >> 10; + pci->mwrites++; + pci->qwrite_kb += bytes >> 10; } else { - pfi->mreads++; - pfi->qread_kb += bytes >> 10; + pci->mreads++; + pci->qread_kb += bytes >> 10; } } -static inline void account_q(struct per_file_info *pfi, int rw, +static inline void account_q(struct per_cpu_info *pci, int rw, unsigned int bytes) { if (rw) { - pfi->qwrites++; - pfi->qwrite_kb += bytes >> 10; + pci->qwrites++; + pci->qwrite_kb += bytes >> 10; } else { - pfi->qreads++; - pfi->qread_kb += bytes >> 10; + pci->qreads++; + pci->qread_kb += bytes >> 10; } } -static inline void account_c(struct per_file_info *pfi, int rw, +static inline void account_c(struct per_cpu_info *pci, int rw, unsigned int bytes) { if (rw) { - pfi->cwrites++; - pfi->cwrite_kb += bytes >> 10; + pci->cwrites++; + pci->cwrite_kb += bytes >> 10; } else { - pfi->creads++; - pfi->cread_kb += bytes >> 10; + pci->creads++; + pci->cread_kb += bytes >> 10; } } -static inline void account_i(struct per_file_info *pfi, int rw, +static inline void account_i(struct per_cpu_info *pci, int rw, unsigned int bytes) { if (rw) { - pfi->iwrites++; - pfi->iwrite_kb += bytes >> 10; + pci->iwrites++; + pci->iwrite_kb += bytes >> 10; } else { - pfi->ireads++; - pfi->iread_kb += bytes >> 10; + pci->ireads++; + pci->iread_kb += bytes >> 10; } } -static void output(char *s) +static void output(struct per_cpu_info *pci, char *s) { printf("%s", s); - fprintf(cur_file->ofp,"%s",s); + + if (pci->ofp) + fprintf(pci->ofp,"%s",s); } static char hstring[256]; static char tstring[256]; -static inline char *setup_header(struct blk_io_trace *t, char act) +static inline char *setup_header(struct per_cpu_info *pci, + struct blk_io_trace *t, char act) { int w = t->action & BLK_TC_ACT(BLK_TC_WRITE); int b = t->action & BLK_TC_ACT(BLK_TC_BARRIER); @@ -153,93 +181,98 @@ static inline char *setup_header(struct blk_io_trace *t, char act) rwbs[i] = '\0'; sprintf(hstring, "%c %3d %15ld %5Lu.%09Lu %5u %c %3s", backwards, - cur_file->cpu, + pci->cpu, (unsigned long)t->sequence, SECONDS(t->time), NANO_SECONDS(t->time), t->pid, act, rwbs); return hstring; } -static void log_complete(struct blk_io_trace *t, char act) +static void log_complete(struct per_cpu_info *pci, struct blk_io_trace *t, + char act) { - sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(t, act), + sprintf(tstring,"%s %Lu + %u [%d]\n", setup_header(pci, t, act), (unsigned long long)t->sector, t->bytes >> 9, t->error); - output(tstring); + output(pci, tstring); } -static void log_queue(struct blk_io_trace *t, char act) +static void log_queue(struct per_cpu_info *pci, struct blk_io_trace *t, + char act) { - sprintf(tstring,"%s %Lu + %u\n", setup_header(t, act), + sprintf(tstring,"%s %Lu + %u\n", setup_header(pci, t, act), (unsigned long long)t->sector, t->bytes >> 9); - output(tstring); + output(pci, tstring); } -static void log_issue(struct blk_io_trace *t, char act) +static void log_issue(struct per_cpu_info *pci, struct blk_io_trace *t, + char act) { - sprintf(tstring,"%s %Lu + %u\n", setup_header(t, act), + sprintf(tstring,"%s %Lu + %u\n", setup_header(pci, t, act), (unsigned long long)t->sector, t->bytes >> 9); - output(tstring); + output(pci, tstring); } -static void log_merge(struct blk_io_trace *t, char act) +static void log_merge(struct per_cpu_info *pci, struct blk_io_trace *t, + char act) { - sprintf(tstring,"%s %Lu + %u\n", setup_header(t, act), + sprintf(tstring,"%s %Lu + %u\n", setup_header(pci, t, act), (unsigned long long)t->sector, t->bytes >> 9); - output(tstring); + output(pci, tstring); } -static void log_generic(struct blk_io_trace *t, char act) +static void log_generic(struct per_cpu_info *pci, struct blk_io_trace *t, + char act) { - sprintf(tstring,"%s %Lu + %u\n", setup_header(t, act), + sprintf(tstring,"%s %Lu + %u\n", setup_header(pci, t, act), (unsigned long long)t->sector, t->bytes >> 9); - output(tstring); + output(pci, tstring); } -static int log_pc(struct blk_io_trace *t, char act) +static int log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char act) { unsigned char *buf; int i; - sprintf(tstring,"%s ", setup_header(t, act)); - output(tstring); + sprintf(tstring,"%s ", setup_header(pci, t, act)); + output(pci, tstring); buf = (unsigned char *) t + sizeof(*t); for (i = 0; i < t->pdu_len; i++) { sprintf(tstring,"%02x ", buf[i]); - output(tstring); + output(pci, tstring); } if (act == 'C') { sprintf(tstring,"[%d]", t->error); - output(tstring); + output(pci, tstring); } printf("\n"); return 0; } -static int dump_trace_pc(struct blk_io_trace *t, struct per_file_info *pfi) +static int dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci) { int ret = 0; switch (t->action & 0xffff) { case __BLK_TA_QUEUE: - log_generic(t, 'Q'); + log_generic(pci, t, 'Q'); break; case __BLK_TA_GETRQ: - log_generic(t, 'G'); + log_generic(pci, t, 'G'); break; case __BLK_TA_SLEEPRQ: - log_generic(t, 'S'); + log_generic(pci, t, 'S'); break; case __BLK_TA_REQUEUE: - log_generic(t, 'R'); + log_generic(pci, t, 'R'); break; case __BLK_TA_ISSUE: - ret = log_pc(t, 'D'); + ret = log_pc(pci, t, 'D'); break; case __BLK_TA_COMPLETE: - log_pc(t, 'C'); + log_pc(pci, t, 'C'); break; default: fprintf(stderr, "Bad pc action %x\n", t->action); @@ -250,40 +283,40 @@ static int dump_trace_pc(struct blk_io_trace *t, struct per_file_info *pfi) return ret; } -static void dump_trace_fs(struct blk_io_trace *t, struct per_file_info *pfi) +static void dump_trace_fs(struct blk_io_trace *t, struct per_cpu_info *pci) { int w = t->action & BLK_TC_ACT(BLK_TC_WRITE); switch (t->action & 0xffff) { case __BLK_TA_QUEUE: - account_q(pfi, w, t->bytes); - log_queue(t, 'Q'); + account_q(pci, w, t->bytes); + log_queue(pci, t, 'Q'); break; case __BLK_TA_BACKMERGE: - account_m(pfi, w, t->bytes); - log_merge(t, 'M'); + account_m(pci, w, t->bytes); + log_merge(pci, t, 'M'); break; case __BLK_TA_FRONTMERGE: - account_m(pfi, w, t->bytes); - log_merge(t, 'F'); + account_m(pci, w, t->bytes); + log_merge(pci, t, 'F'); break; case __BLK_TA_GETRQ: - log_generic(t, 'G'); + log_generic(pci, t, 'G'); break; case __BLK_TA_SLEEPRQ: - log_generic(t, 'S'); + log_generic(pci, t, 'S'); break; case __BLK_TA_REQUEUE: - account_c(pfi, w, -t->bytes); - log_queue(t, 'R'); + account_c(pci, w, -t->bytes); + log_queue(pci, t, 'R'); break; case __BLK_TA_ISSUE: - account_i(pfi, w, t->bytes); - log_issue(t, 'D'); + account_i(pci, w, t->bytes); + log_issue(pci, t, 'D'); break; case __BLK_TA_COMPLETE: - account_c(pfi, w, t->bytes); - log_complete(t, 'C'); + account_c(pci, w, t->bytes); + log_complete(pci, t, 'C'); break; default: fprintf(stderr, "Bad fs action %x\n", t->action); @@ -291,65 +324,65 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_file_info *pfi) } } -static int dump_trace(struct blk_io_trace *t, struct per_file_info *pfi) +static int dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci) { int ret = 0; if (t->action & BLK_TC_ACT(BLK_TC_PC)) - ret = dump_trace_pc(t, pfi); + ret = dump_trace_pc(t, pci); else - dump_trace_fs(t, pfi); + dump_trace_fs(t, pci); events++; return ret; } -static void dump_pfi_stats(struct per_file_info *pfi) +static void dump_pci_stats(struct per_cpu_info *pci) { printf("\tReads:\n"); - printf("\t\tQueued: %'8lu, %'8LuKiB\n", pfi->qreads, pfi->qread_kb); - printf("\t\tDispatched %'8lu, %'8LuKiB\n", pfi->ireads, pfi->iread_kb); - printf("\t\tCompleted: %'8lu, %'8LuKiB\n", pfi->creads, pfi->cread_kb); - printf("\t\tMerges: %'8lu\n", pfi->mreads); + printf("\t\tQueued: %'8lu, %'8LuKiB\n", pci->qreads, pci->qread_kb); + printf("\t\tDispatched %'8lu, %'8LuKiB\n", pci->ireads, pci->iread_kb); + printf("\t\tCompleted: %'8lu, %'8LuKiB\n", pci->creads, pci->cread_kb); + printf("\t\tMerges: %'8lu\n", pci->mreads); printf("\tWrites:\n"); - printf("\t\tQueued: %'8lu, %'8LuKiB\n", pfi->qwrites,pfi->qwrite_kb); - printf("\t\tDispatched %'8lu, %'8LuKiB\n", pfi->iwrites,pfi->iwrite_kb); - printf("\t\tCompleted: %'8lu, %'8LuKiB\n", pfi->cwrites,pfi->cwrite_kb); - printf("\t\tMerges: %'8lu\n", pfi->mwrites); + printf("\t\tQueued: %'8lu, %'8LuKiB\n", pci->qwrites,pci->qwrite_kb); + printf("\t\tDispatched %'8lu, %'8LuKiB\n", pci->iwrites,pci->iwrite_kb); + printf("\t\tCompleted: %'8lu, %'8LuKiB\n", pci->cwrites,pci->cwrite_kb); + printf("\t\tMerges: %'8lu\n", pci->mwrites); } -static void show_stats(int nfiles) +static void show_stats(void) { - struct per_file_info foo, *pfi; + struct per_cpu_info foo, *pci; int i; memset(&foo, 0, sizeof(foo)); for (i = 0; i < nfiles; i++) { - pfi = &per_file_info[i]; + pci = &per_cpu_info[i]; - if (!pfi->nelems) + if (!pci->nelems) continue; - foo.qreads += pfi->qreads; - foo.qwrites += pfi->qwrites; - foo.creads += pfi->creads; - foo.cwrites += pfi->cwrites; - foo.mreads += pfi->mreads; - foo.mwrites += pfi->mwrites; - foo.qread_kb += pfi->qread_kb; - foo.qwrite_kb += pfi->qwrite_kb; - foo.cread_kb += pfi->cread_kb; - foo.cwrite_kb += pfi->cwrite_kb; + foo.qreads += pci->qreads; + foo.qwrites += pci->qwrites; + foo.creads += pci->creads; + foo.cwrites += pci->cwrites; + foo.mreads += pci->mreads; + foo.mwrites += pci->mwrites; + foo.qread_kb += pci->qread_kb; + foo.qwrite_kb += pci->qwrite_kb; + foo.cread_kb += pci->cread_kb; + foo.cwrite_kb += pci->cwrite_kb; printf("CPU%d:\n", i); - dump_pfi_stats(pfi); + dump_pci_stats(pci); } if (nfiles > 1) { printf("Total:\n"); - dump_pfi_stats(&foo); + dump_pci_stats(&foo); } printf("Events: %'Lu\n", events); @@ -392,7 +425,6 @@ static int sort_entries(void *traces, unsigned long offset, int cpu) t = malloc(sizeof(*t)); t->bit = bit; - t->cpu = cpu; memset(&t->rb_node, 0, sizeof(t->rb_node)); trace_to_cpu(bit); @@ -410,7 +442,7 @@ static int sort_entries(void *traces, unsigned long offset, int cpu) return nelems; } -static void show_entries(void) +static void show_entries_rb(void) { struct blk_io_trace *bit; struct rb_node *n; @@ -425,72 +457,66 @@ static void show_entries(void) t = rb_entry(n, struct trace, rb_node); bit = t->bit; - cpu = t->cpu; + cpu = bit->cpu; if (cpu > max_cpus) { fprintf(stderr, "CPU number too large (%d)\n", cpu); break; } - cur_file = &per_file_info[cpu]; - if (genesis_time == 0) genesis_time = bit->time; bit->time -= genesis_time; check_time(bit); - if (dump_trace(bit, cur_file)) + if (dump_trace(bit, &per_cpu_info[cpu])) break; } while ((n = rb_next(n)) != NULL); } -int main(int argc, char *argv[]) +static int do_file(void) { - int i, nfiles, ret; - char *dev; - - if (argc != 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - return 1; - } - - dev = argv[1]; + int i, ret; memset(&rb_root, 0, sizeof(rb_root)); - memset(per_file_info, 0, sizeof(per_file_info)); - for (max_cpus = 0, i = 0, nfiles = 0; i < MAX_CPUS; i++, nfiles++, max_cpus++) { - struct per_file_info *pfi = &per_file_info[i]; + for (max_cpus = 0, i = 0; i < MAX_CPUS; i++, nfiles++, max_cpus++) { + struct per_cpu_info *pci = &per_cpu_info[i]; struct stat st; void *tb; - pfi->cpu = i; + pci->cpu = i; + pci->ofp = NULL; - snprintf(pfi->fname, sizeof(pfi->fname)-1,"%s_out.%d", dev, i); - if (stat(pfi->fname, &st) < 0) + snprintf(pci->fname, sizeof(pci->fname)-1,"%s_out.%d", dev, i); + if (stat(pci->fname, &st) < 0) break; if (!st.st_size) continue; - snprintf(pfi->ofname, sizeof(pfi->ofname)-1, "%s_log.%d", dev, i); - pfi->ofp = fopen(pfi->ofname, "w"); - if (pfi->ofp == NULL) { - perror(pfi->ofname); - break; + if (output_name) { + snprintf(pci->ofname, sizeof(pci->ofname) - 1, + "%s_log.%d", output_name, i); + + pci->ofp = fopen(pci->ofname, "w"); + if (pci->ofp == NULL) { + perror(pci->ofname); + break; + } } - printf("Processing %s\n", pfi->fname); + printf("Processing %s\n", pci->fname); tb = malloc(st.st_size); - pfi->fd = open(pfi->fname, O_RDONLY); - if (pfi->fd < 0) { - perror(pfi->fname); + pci->fd = open(pci->fname, O_RDONLY); + if (pci->fd < 0) { + perror(pci->fname); break; } - if (read(pfi->fd, tb, st.st_size) != st.st_size) { + if (read(pci->fd, tb, st.st_size) != st.st_size) { fprintf(stderr, "error reading\n"); break; } @@ -499,18 +525,161 @@ int main(int argc, char *argv[]) if (ret == -1) break; - close(pfi->fd); - pfi->nelems = ret; - printf("\t%2d %10s %15d\n", i, pfi->fname, pfi->nelems); + close(pci->fd); + pci->nelems = ret; + printf("\t%2d %10s %15d\n", i, pci->fname, pci->nelems); + + } + + if (!nfiles) { + fprintf(stderr, "No files found\n"); + return 1; + } + + show_entries_rb(); + show_stats(); + return 0; +} + +static int read_data(int fd, void *buffer, int bytes) +{ + int ret, bytes_left; + void *p; + + bytes_left = bytes; + p = buffer; + while (bytes_left > 0) { + ret = read(fd, p, bytes_left); + if (!ret) + return 1; + else if (ret < 0) { + perror("read"); + return 1; + } else { + p += ret; + bytes_left -= ret; + } + } + + return 0; +} + +static int do_stdin(void) +{ + struct blk_io_trace *bit; + int ptr_len, fd; + void *ptr; + + nfiles = 1; + + fd = dup(0); + ptr_len = sizeof(*bit); + ptr = malloc(ptr_len); + bit = ptr; + do { + struct per_cpu_info *pci; + int cpu; + + if (read_data(fd, bit, sizeof(*bit))) + break; + + trace_to_cpu(bit); + + if (bit->pdu_len) { + struct blk_io_trace t; + + if (ptr_len < bit->pdu_len + sizeof(*bit)) { + memcpy(&t, bit, sizeof(t)); + if (ptr) + free(ptr); + + ptr_len = bit->pdu_len + sizeof(*bit); + ptr = malloc(ptr_len); + bit = ptr; + memcpy(ptr, &t, sizeof(t)); + } + if (read_data(fd, ptr + sizeof(*bit), bit->pdu_len)) + break; + } + + if (verify_trace(bit)) + break; + + cpu = bit->cpu; + if (cpu > max_cpus) { + fprintf(stderr, "CPU number too large (%d)\n", cpu); + break; + } + + pci = &per_cpu_info[cpu]; + pci->nelems++; + + if (output_name && !pci->ofp) { + snprintf(pci->ofname, sizeof(pci->ofname) - 1, + "%s_log.%d", output_name, cpu); + + pci->ofp = fopen(pci->ofname, "w"); + if (pci->ofp == NULL) { + perror(pci->ofname); + break; + } + } + + if (genesis_time == 0) + genesis_time = bit->time; + bit->time -= genesis_time; + + check_time(bit); + + if (dump_trace(bit, pci)) + break; + + } while (1); + + close(fd); + free(ptr); + show_stats(); + return 0; +} +int main(int argc, char *argv[]) +{ + int c, ret, i; + + while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) { + switch (c) { + case 'i': + dev = strdup(optarg); + break; + case 'o': + output_name = strdup(optarg); + break; + default: + fprintf(stderr, "Usage: %s -i \n", argv[0]); + return 1; + } } - if (nfiles) { - show_entries(); - show_stats(nfiles); - return 0; + if (!dev) { + fprintf(stderr, "Usage: %s -i \n", argv[0]); + return 1; + } + + memset(per_cpu_info, 0, sizeof(per_cpu_info)); + + if (!strcmp(dev, "-")) + ret = do_stdin(); + else + ret = do_file(); + + for (i = 0; i < MAX_CPUS; i++) { + struct per_cpu_info *pci = &per_cpu_info[i]; + + if (pci->ofp) { + fflush(pci->ofp); + fclose(pci->ofp); + } } - fprintf(stderr, "No files found\n"); - return 1; + return ret; } diff --git a/blktrace.c b/blktrace.c index 38549f0..c7d5d24 100644 --- a/blktrace.c +++ b/blktrace.c @@ -65,8 +65,8 @@ struct mask_map mask_maps[] = { DECLARE_MASK_MAP(PC), }; -#define S_OPTS "d:a:A:r:" -struct option l_opts[] = { +#define S_OPTS "d:a:A:r:o:" +static struct option l_opts[] = { { .name = "dev", .has_arg = 1, @@ -91,6 +91,12 @@ struct option l_opts[] = { .flag = NULL, .val = 'r' }, + { + .name = "output", + .has_arg = 1, + .flag = NULL, + .val = 'o' + }, { .name = NULL, .has_arg = 0, @@ -106,6 +112,9 @@ struct thread_information { int fd; char fn[MAXPATHLEN + 64]; + pthread_mutex_t *fd_lock; + int ofd; + unsigned long events_processed; }; @@ -118,9 +127,12 @@ static int devfd, ncpus; static struct thread_information *thread_information; static char *buts_name_p; static char *dev; +static char *output_name; static int act_mask = ~0U; static int trace_started; +static pthread_mutex_t stdout_mutex = PTHREAD_MUTEX_INITIALIZER; + inline int compare_mask_map(struct mask_map *mmp, char *string) { int i, result; @@ -161,7 +173,6 @@ static int start_trace(char *dev) buts.buf_nr = BUF_NR; buts.act_mask = act_mask; - printf("Starting trace on %s\n", dev); if (ioctl(devfd, BLKSTARTTRACE, &buts) < 0) { perror("BLKSTARTTRACE"); return 1; @@ -183,8 +194,7 @@ static void stop_trace(void) } } -static void extract_data(struct thread_information *tip, - char *ofn, int ofd, int nb) +static void extract_data(struct thread_information *tip, char *ofn, int nb) { int ret, bytes_left; unsigned char *buf, *p; @@ -208,7 +218,7 @@ static void extract_data(struct thread_information *tip, } } - ret = write(ofd, buf, nb); + ret = write(tip->ofd, buf, nb); if (ret != nb) { perror(ofn); fprintf(stderr,"Thread %d extract_data %s failed\n", tip->cpu, ofn); @@ -219,11 +229,23 @@ static void extract_data(struct thread_information *tip, free(buf); } +static inline void tip_fd_unlock(struct thread_information *tip) +{ + if (tip->fd_lock) + pthread_mutex_unlock(tip->fd_lock); +} + +static inline void tip_fd_lock(struct thread_information *tip) +{ + if (tip->fd_lock) + pthread_mutex_lock(tip->fd_lock); +} + static void *extract(void *arg) { struct thread_information *tip = arg; - int ret, ofd, pdu_len; - char op[64], dp[64]; + int ret, pdu_len; + char dp[64]; struct blk_io_trace t; pid_t pid = getpid(); cpu_set_t cpu_mask; @@ -236,14 +258,6 @@ static void *extract(void *arg) exit(1); } - sprintf(op, "%s_out.%d", buts_name_p, tip->cpu); - ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644); - if (ofd < 0) { - perror(op); - fprintf(stderr,"Thread %d failed creat of %s\n", tip->cpu, op); - exit(1); - } - snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d", relay_path, buts_name_p, tip->cpu); tip->fd = open(tip->fn, O_RDONLY); @@ -279,16 +293,19 @@ static void *extract(void *arg) trace_to_be(&t); - ret = write(ofd, &t, sizeof(t)); + tip_fd_lock(tip); + + ret = write(tip->ofd, &t, sizeof(t)); if (ret < 0) { - perror(op); - fprintf(stderr,"Thread %d failed write of %s\n", - tip->cpu, op); + fprintf(stderr,"Thread %d failed write\n", tip->cpu); + tip_fd_unlock(tip); exit(1); } if (pdu_len) - extract_data(tip, dp, ofd, pdu_len); + extract_data(tip, dp, pdu_len); + + tip_fd_unlock(tip); tip->events_processed++; } @@ -299,6 +316,7 @@ static void *extract(void *arg) static int start_threads(void) { struct thread_information *tip; + char op[64]; int i; ncpus = sysconf(_SC_NPROCESSORS_ONLN); @@ -306,13 +324,26 @@ static int start_threads(void) fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n"); return 1; } - printf("Processors online: %d\n", ncpus); thread_information = malloc(ncpus * sizeof(struct thread_information)); for (i = 0, tip = thread_information; i < ncpus; i++, tip++) { + tip->fd_lock = NULL; tip->cpu = i; tip->events_processed = 0; + if (!strcmp(output_name, "-")) { + tip->ofd = dup(1); + tip->fd_lock = &stdout_mutex; + } else { + sprintf(op, "%s_out.%d", output_name, tip->cpu); + tip->ofd = open(op, O_CREAT|O_TRUNC|O_WRONLY, 0644); + } + + if (tip->ofd < 0) { + perror(op); + return 1; + } + if (pthread_create(&tip->thread, NULL, extract, tip)) { perror( "pthread_create"); return 0; @@ -332,7 +363,9 @@ static void stop_threads(void) if (pthread_join(tip->thread, (void *) &ret)) perror("thread_join"); + close(tip->fd); + close(tip->ofd); } } @@ -342,6 +375,9 @@ void show_stats(void) struct thread_information *tip; unsigned long events_processed = 0; + if (!strcmp(output_name, "-")) + return; + for (i = 0, tip = thread_information; i < ncpus; i++, tip++) { printf("CPU%3d: %20ld events\n", tip->cpu, tip->events_processed); @@ -353,7 +389,6 @@ void show_stats(void) void handle_sigint(int sig) { - printf("exiting on signal %d\n", sig); done = 1; } @@ -394,6 +429,10 @@ int main(int argc, char *argv[]) relay_path = optarg; break; + case 'o': + output_name = strdup(optarg); + break; + default: fprintf(stderr,"Usage: %s -d " "[-a [-a ]]\n", argv[0]); @@ -410,14 +449,8 @@ int main(int argc, char *argv[]) if (!relay_path) relay_path = default_relay_path; - if (act_mask_tmp != 0) { + if (act_mask_tmp != 0) act_mask = act_mask_tmp; - printf("Tracing 0x%04x: ", act_mask); - for (i = 0; i < BLK_TC_SHIFT; i++) - if (act_mask & (1 << i)) - printf("%s ", mask_maps[i].short_form); - printf("\n"); - } if (stat(relay_path, &st) < 0) { fprintf(stderr,"%s does not appear to be mounted\n", @@ -433,6 +466,9 @@ int main(int argc, char *argv[]) setlocale(LC_NUMERIC, "en_US"); + if (!output_name) + output_name = strdup(buts_name_p); + i = start_threads(); if (!i) { fprintf(stderr, "Failed to start worker threads\n"); @@ -440,8 +476,6 @@ int main(int argc, char *argv[]) return 4; } - printf("Threads started : %d\n", i); - signal(SIGINT, handle_sigint); signal(SIGHUP, handle_sigint); signal(SIGTERM, handle_sigint); diff --git a/blktrace.h b/blktrace.h index 388f219..521015e 100644 --- a/blktrace.h +++ b/blktrace.h @@ -7,7 +7,7 @@ #include "blktrace_api.h" #define CHECK_MAGIC(t) (((t)->magic & 0xffffff00) == BLK_IO_TRACE_MAGIC) -#define SUPPORTED_VERSION (0x02) +#define SUPPORTED_VERSION (0x03) #if defined(__LITTLE_ENDIAN_BITFIELD) #define be16_to_cpu(x) __bswap_16(x) diff --git a/blktrace_api.h b/blktrace_api.h index 3861ab6..5b0ab31 100644 --- a/blktrace_api.h +++ b/blktrace_api.h @@ -51,7 +51,7 @@ enum { #define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE)) #define BLK_IO_TRACE_MAGIC 0x65617400 -#define BLK_IO_TRACE_VERSION 0x02 +#define BLK_IO_TRACE_VERSION 0x03 /* * The trace itself @@ -64,6 +64,7 @@ struct blk_io_trace { __u32 bytes; /* transfer length */ __u32 action; /* what happened */ __u32 pid; /* who did it */ + __u16 cpu; /* on what cpu did it happen */ __u16 error; /* completion error */ __u16 pdu_len; /* length of data after this trace */ }; diff --git a/kernel/blk-trace-2.6.13-mm1-C0 b/kernel/blk-trace-2.6.13-mm1-D0 similarity index 86% rename from kernel/blk-trace-2.6.13-mm1-C0 rename to kernel/blk-trace-2.6.13-mm1-D0 index ed5b442..4c0c03f 100644 --- a/kernel/blk-trace-2.6.13-mm1-C0 +++ b/kernel/blk-trace-2.6.13-mm1-D0 @@ -1,34 +1,7 @@ -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/drivers/block/Kconfig linux-2.6.13-mm1/drivers/block/Kconfig ---- /opt/kernel/linux-2.6.13-mm1/drivers/block/Kconfig 2005-09-02 13:46:54.000000000 +0200 -+++ linux-2.6.13-mm1/drivers/block/Kconfig 2005-09-02 14:03:01.000000000 +0200 -@@ -419,6 +419,14 @@ config LBD - your machine, or if you want to have a raid or loopback device - bigger than 2TB. Otherwise say N. - -+config BLK_DEV_IO_TRACE -+ bool "Support for tracing block io actions" -+ select RELAYFS_FS -+ help -+ Say Y here, if you want to be able to trace the block layer actions -+ on a given queue. -+ -+ - config CDROM_PKTCDVD - tristate "Packet writing on CD/DVD media" - depends on !UML -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/drivers/block/Makefile linux-2.6.13-mm1/drivers/block/Makefile ---- /opt/kernel/linux-2.6.13-mm1/drivers/block/Makefile 2005-09-02 13:46:54.000000000 +0200 -+++ linux-2.6.13-mm1/drivers/block/Makefile 2005-09-02 14:03:01.000000000 +0200 -@@ -45,3 +45,5 @@ obj-$(CONFIG_VIODASD) += viodasd.o - obj-$(CONFIG_BLK_DEV_SX8) += sx8.o - obj-$(CONFIG_BLK_DEV_UB) += ub.o - -+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o -+ -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/drivers/block/blktrace.c linux-2.6.13-mm1/drivers/block/blktrace.c +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/drivers/block/blktrace.c linux-2.6.13-mm1/drivers/block/blktrace.c --- /opt/kernel/linux-2.6.13-mm1/drivers/block/blktrace.c 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.13-mm1/drivers/block/blktrace.c 2005-09-02 16:03:25.000000000 +0200 -@@ -0,0 +1,223 @@ ++++ linux-2.6.13-mm1/drivers/block/blktrace.c 2005-09-05 12:50:53.000000000 +0200 +@@ -0,0 +1,224 @@ +#include +#include +#include @@ -74,6 +47,7 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m + t.bytes = bytes; + t.action = what; + t.pid = current->pid; ++ t.cpu = raw_smp_processor_id(); + t.error = error; + t.pdu_len = pdu_len; + @@ -252,9 +226,9 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m + +module_init(blk_trace_init); + -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/drivers/block/elevator.c linux-2.6.13-mm1/drivers/block/elevator.c ---- /opt/kernel/linux-2.6.13-mm1/drivers/block/elevator.c 2005-09-02 13:46:54.000000000 +0200 -+++ linux-2.6.13-mm1/drivers/block/elevator.c 2005-09-02 14:03:01.000000000 +0200 +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/drivers/block/elevator.c linux-2.6.13-mm1/drivers/block/elevator.c +--- /opt/kernel/linux-2.6.13-mm1/drivers/block/elevator.c 2005-09-01 13:07:30.000000000 +0200 ++++ linux-2.6.13-mm1/drivers/block/elevator.c 2005-09-05 09:33:45.000000000 +0200 @@ -34,6 +34,7 @@ #include #include @@ -273,9 +247,9 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m /* * just mark as started even if we don't start it, a request * that has been delayed should not be passed by new incoming -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/drivers/block/ioctl.c linux-2.6.13-mm1/drivers/block/ioctl.c ---- /opt/kernel/linux-2.6.13-mm1/drivers/block/ioctl.c 2005-09-02 13:46:54.000000000 +0200 -+++ linux-2.6.13-mm1/drivers/block/ioctl.c 2005-09-02 14:03:01.000000000 +0200 +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/drivers/block/ioctl.c linux-2.6.13-mm1/drivers/block/ioctl.c +--- /opt/kernel/linux-2.6.13-mm1/drivers/block/ioctl.c 2005-09-01 13:07:30.000000000 +0200 ++++ linux-2.6.13-mm1/drivers/block/ioctl.c 2005-09-05 09:33:45.000000000 +0200 @@ -4,6 +4,7 @@ #include #include @@ -295,9 +269,27 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m } return -ENOIOCTLCMD; } -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/drivers/block/ll_rw_blk.c linux-2.6.13-mm1/drivers/block/ll_rw_blk.c ---- /opt/kernel/linux-2.6.13-mm1/drivers/block/ll_rw_blk.c 2005-09-02 13:46:54.000000000 +0200 -+++ linux-2.6.13-mm1/drivers/block/ll_rw_blk.c 2005-09-02 15:14:43.000000000 +0200 +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/drivers/block/Kconfig linux-2.6.13-mm1/drivers/block/Kconfig +--- /opt/kernel/linux-2.6.13-mm1/drivers/block/Kconfig 2005-09-01 13:07:30.000000000 +0200 ++++ linux-2.6.13-mm1/drivers/block/Kconfig 2005-09-05 09:33:45.000000000 +0200 +@@ -419,6 +419,14 @@ config LBD + your machine, or if you want to have a raid or loopback device + bigger than 2TB. Otherwise say N. + ++config BLK_DEV_IO_TRACE ++ bool "Support for tracing block io actions" ++ select RELAYFS_FS ++ help ++ Say Y here, if you want to be able to trace the block layer actions ++ on a given queue. ++ ++ + config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media" + depends on !UML +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/drivers/block/ll_rw_blk.c linux-2.6.13-mm1/drivers/block/ll_rw_blk.c +--- /opt/kernel/linux-2.6.13-mm1/drivers/block/ll_rw_blk.c 2005-09-01 13:07:30.000000000 +0200 ++++ linux-2.6.13-mm1/drivers/block/ll_rw_blk.c 2005-09-05 09:33:45.000000000 +0200 @@ -29,6 +29,7 @@ #include #include @@ -392,9 +384,18 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m /* * extend uptodate bool to allow < 0 value to be direct io error */ -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/include/linux/blkdev.h linux-2.6.13-mm1/include/linux/blkdev.h ---- /opt/kernel/linux-2.6.13-mm1/include/linux/blkdev.h 2005-09-02 13:47:02.000000000 +0200 -+++ linux-2.6.13-mm1/include/linux/blkdev.h 2005-09-02 14:03:01.000000000 +0200 +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/drivers/block/Makefile linux-2.6.13-mm1/drivers/block/Makefile +--- /opt/kernel/linux-2.6.13-mm1/drivers/block/Makefile 2005-09-01 13:07:30.000000000 +0200 ++++ linux-2.6.13-mm1/drivers/block/Makefile 2005-09-05 09:33:45.000000000 +0200 +@@ -45,3 +45,5 @@ obj-$(CONFIG_VIODASD) += viodasd.o + obj-$(CONFIG_BLK_DEV_SX8) += sx8.o + obj-$(CONFIG_BLK_DEV_UB) += ub.o + ++obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o ++ +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/include/linux/blkdev.h linux-2.6.13-mm1/include/linux/blkdev.h +--- /opt/kernel/linux-2.6.13-mm1/include/linux/blkdev.h 2005-09-01 13:07:53.000000000 +0200 ++++ linux-2.6.13-mm1/include/linux/blkdev.h 2005-09-05 09:33:45.000000000 +0200 @@ -22,6 +22,7 @@ typedef struct request_queue request_que struct elevator_queue; typedef struct elevator_queue elevator_t; @@ -412,10 +413,10 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m }; enum { -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/include/linux/blktrace.h linux-2.6.13-mm1/include/linux/blktrace.h +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/include/linux/blktrace.h linux-2.6.13-mm1/include/linux/blktrace.h --- /opt/kernel/linux-2.6.13-mm1/include/linux/blktrace.h 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.13-mm1/include/linux/blktrace.h 2005-09-02 15:14:43.000000000 +0200 -@@ -0,0 +1,148 @@ ++++ linux-2.6.13-mm1/include/linux/blktrace.h 2005-09-05 12:54:43.000000000 +0200 +@@ -0,0 +1,149 @@ +#ifndef BLKTRACE_H +#define BLKTRACE_H + @@ -471,7 +472,7 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m +#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE)) + +#define BLK_IO_TRACE_MAGIC 0x65617400 -+#define BLK_IO_TRACE_VERSION 0x02 ++#define BLK_IO_TRACE_VERSION 0x03 + +/* + * The trace itself @@ -484,6 +485,7 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m + u32 bytes; /* transfer length */ + u32 action; /* what happened */ + u32 pid; /* who did it */ ++ u16 cpu; /* on what cpu did it happen */ + u16 error; /* completion error */ + u16 pdu_len; /* length of data after this trace */ +}; @@ -564,9 +566,9 @@ diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-m +#endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#endif -diff -urpN -X linux-2.6.13-mm1/Documentation/dontdiff /opt/kernel/linux-2.6.13-mm1/include/linux/fs.h linux-2.6.13-mm1/include/linux/fs.h ---- /opt/kernel/linux-2.6.13-mm1/include/linux/fs.h 2005-09-02 13:47:02.000000000 +0200 -+++ linux-2.6.13-mm1/include/linux/fs.h 2005-09-02 14:03:01.000000000 +0200 +diff -urpN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.6.13-mm1/include/linux/fs.h linux-2.6.13-mm1/include/linux/fs.h +--- /opt/kernel/linux-2.6.13-mm1/include/linux/fs.h 2005-09-01 13:07:53.000000000 +0200 ++++ linux-2.6.13-mm1/include/linux/fs.h 2005-09-05 09:33:45.000000000 +0200 @@ -196,6 +196,8 @@ extern int dir_notify_enable; #define BLKBSZGET _IOR(0x12,112,size_t) #define BLKBSZSET _IOW(0x12,113,size_t) -- 2.25.1