X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=blkparse.c;h=fcc0ccd0203e69f93ebaf52d55ce12448d8202f5;hb=a61c06774d03fb768fa9fb021e94599822848c8a;hp=90614d81cf9efbd504daae660daea1ce6d734cdf;hpb=4eb899a6de14660e2ce722dcc069836e17a6bc3e;p=blktrace.git diff --git a/blkparse.c b/blkparse.c index 90614d8..fcc0ccd 100644 --- a/blkparse.c +++ b/blkparse.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,7 @@ #include "rbtree.h" #include "jhash.h" -static char blkparse_version[] = "0.99.2"; +static char blkparse_version[] = "1.2.0"; struct skip_info { unsigned long start, end; @@ -100,11 +101,23 @@ struct per_process_info { #define PPI_HASH_SHIFT (8) #define PPI_HASH_SIZE (1 << PPI_HASH_SHIFT) #define PPI_HASH_MASK (PPI_HASH_SIZE - 1) + +enum { + SORT_PROG_EVENT_N, /* Program Name */ + SORT_PROG_EVENT_QKB, /* KB: Queued read and write */ + SORT_PROG_EVENT_RKB, /* KB: Queued Read */ + SORT_PROG_EVENT_WKB, /* KB: Queued Write */ + SORT_PROG_EVENT_CKB, /* KB: Complete */ + SORT_PROG_EVENT_QIO, /* IO: Queued read and write */ + SORT_PROG_EVENT_RIO, /* IO: Queued Read */ + SORT_PROG_EVENT_WIO, /* IO: Queued Write */ + SORT_PROG_EVENT_CIO, /* IO: Complete */ +}; + static struct per_process_info *ppi_hash_table[PPI_HASH_SIZE]; static struct per_process_info *ppi_list; static int ppi_list_entries; -#define S_OPTS "a:A:b:D:d:f:F:hi:o:Oqstw:vV" static struct option l_opts[] = { { .name = "act-mask", @@ -160,6 +173,12 @@ static struct option l_opts[] = { .flag = NULL, .val = 'i' }, + { + .name = "no-msgs", + .has_arg = no_argument, + .flag = NULL, + .val = 'M' + }, { .name = "output", .has_arg = required_argument, @@ -184,6 +203,12 @@ static struct option l_opts[] = { .flag = NULL, .val = 's' }, + { + .name = "sort-program-stats", + .has_arg = required_argument, + .flag = NULL, + .val = 'S' + }, { .name = "track-ios", .has_arg = no_argument, @@ -264,12 +289,14 @@ static unsigned long long stopwatch_end = -1ULL; /* "infinity" */ static unsigned long read_sequence; static int per_process_stats; +static int per_process_stats_event = SORT_PROG_EVENT_N; static int per_device_and_cpu_stats = 1; static int track_ios; static int ppi_hash_by_pid = 1; static int verbose; static unsigned int act_mask = -1U; static int stats_printed; +static int bin_output_msgs = 1; int data_is_native = -1; static FILE *dump_fp; @@ -292,12 +319,29 @@ static volatile int done; struct timespec abs_start_time; static unsigned long long start_timestamp; +static int have_drv_data = 0; + #define JHASH_RANDOM (0x3af5f2ee) #define CPUS_PER_LONG (8 * sizeof(unsigned long)) #define CPU_IDX(cpu) ((cpu) / CPUS_PER_LONG) #define CPU_BIT(cpu) ((cpu) & (CPUS_PER_LONG - 1)) +static void io_warn_unless(struct blk_io_trace *t, int condition, + const char *fmt, ...) +{ + va_list ap; + + if (condition) + return; + va_start(ap, fmt); + printf("(%d,%d) request %llu + %u: ", + MAJOR(t->device), MINOR(t->device), + t->sector, t->bytes); + vfprintf(stderr, fmt, ap); + va_end(ap); +} + static void output_binary(void *buf, int len) { if (dump_binary) { @@ -544,7 +588,7 @@ static struct process_pid_map *find_ppm(pid_t pid) return NULL; } -static void add_ppm_hash(pid_t pid, const char *name) +static struct process_pid_map *add_ppm_hash(pid_t pid, const char *name) { const int hash_idx = ppm_hash_pid(pid); struct process_pid_map *ppm; @@ -554,10 +598,14 @@ static void add_ppm_hash(pid_t pid, const char *name) ppm = malloc(sizeof(*ppm)); memset(ppm, 0, sizeof(*ppm)); ppm->pid = pid; - strcpy(ppm->comm, name); + memset(ppm->comm, 0, sizeof(ppm->comm)); + strncpy(ppm->comm, name, sizeof(ppm->comm)); + ppm->comm[sizeof(ppm->comm) - 1] = '\0'; ppm->hash_next = ppm_hash_table[hash_idx]; ppm_hash_table[hash_idx] = ppm; } + + return ppm; } static void handle_notify(struct blk_io_trace *bit) @@ -588,6 +636,22 @@ static void handle_notify(struct blk_io_trace *bit) break; + case BLK_TN_MESSAGE: + if (bit->pdu_len > 0) { + char msg[bit->pdu_len+1]; + + memcpy(msg, (char *)payload, bit->pdu_len); + msg[bit->pdu_len] = '\0'; + + fprintf(ofp, + "%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n", + MAJOR(bit->device), MINOR(bit->device), + bit->cpu, "0", (int) SECONDS(bit->time), + (unsigned long) NANO_SECONDS(bit->time), + 0, "m", "N", msg); + } + break; + default: /* Ignore unknown notify events */ ; @@ -937,7 +1001,13 @@ static struct io_track *find_track(struct per_dev_info *pdi, pid_t pid, if (!iot) { iot = malloc(sizeof(*iot)); iot->ppm = find_ppm(pid); + if (!iot->ppm) + iot->ppm = add_ppm_hash(pid, "unknown"); iot->sector = sector; + iot->allocation_time = -1ULL; + iot->queue_time = -1ULL; + iot->dispatch_time = -1ULL; + iot->completion_time = -1ULL; track_rb_insert(pdi, iot); } @@ -956,7 +1026,7 @@ static void log_track_frontmerge(struct per_dev_info *pdi, if (!iot) { if (verbose) fprintf(stderr, "merge not found for (%d,%d): %llu\n", - MAJOR(pdi->dev), MINOR(pdi->dev), + MAJOR(t->device), MINOR(t->device), (unsigned long long) t->sector + t_sec(t)); return; } @@ -974,16 +1044,11 @@ static void log_track_getrq(struct per_dev_info *pdi, struct blk_io_trace *t) return; iot = find_track(pdi, t->pid, t->sector); + io_warn_unless(t, iot->allocation_time == -1ULL, + "confused about %s time", "allocation"); iot->allocation_time = t->time; } -static inline int is_remapper(struct per_dev_info *pdi) -{ - int major = MAJOR(pdi->dev); - - return (major == 253 || major == 9); -} - /* * for md/dm setups, the interesting cycle is Q -> C. So track queueing * time here, as dispatch time @@ -994,10 +1059,10 @@ static void log_track_queue(struct per_dev_info *pdi, struct blk_io_trace *t) if (!track_ios) return; - if (!is_remapper(pdi)) - return; iot = find_track(pdi, t->pid, t->sector); + io_warn_unless(t, iot->dispatch_time == -1ULL, + "confused about %s time", "dispatch"); iot->dispatch_time = t->time; } @@ -1014,9 +1079,11 @@ static unsigned long long log_track_insert(struct per_dev_info *pdi, return -1; iot = find_track(pdi, t->pid, t->sector); + io_warn_unless(t, iot->queue_time == -1ULL, + "confused about %s time", "queue"); iot->queue_time = t->time; - if (!iot->allocation_time) + if (iot->allocation_time == -1ULL) return -1; elapsed = iot->queue_time - iot->allocation_time; @@ -1038,7 +1105,7 @@ static unsigned long long log_track_insert(struct per_dev_info *pdi, static unsigned long long log_track_issue(struct per_dev_info *pdi, struct blk_io_trace *t) { - unsigned long long elapsed; + unsigned long long elapsed = -1ULL; struct io_track *iot; if (!track_ios) @@ -1050,15 +1117,18 @@ static unsigned long long log_track_issue(struct per_dev_info *pdi, if (!iot) { if (verbose) fprintf(stderr, "issue not found for (%d,%d): %llu\n", - MAJOR(pdi->dev), MINOR(pdi->dev), + MAJOR(t->device), MINOR(t->device), (unsigned long long) t->sector); return -1; } + io_warn_unless(t, iot->dispatch_time == -1ULL, + "confused about %s time", "dispatch"); iot->dispatch_time = t->time; - elapsed = iot->dispatch_time - iot->queue_time; + if (iot->queue_time != -1ULL) + elapsed = iot->dispatch_time - iot->queue_time; - if (per_process_stats) { + if (elapsed != -1ULL && per_process_stats) { struct per_process_info *ppi = find_ppi(iot->ppm->pid); int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; @@ -1075,7 +1145,7 @@ static unsigned long long log_track_issue(struct per_dev_info *pdi, static unsigned long long log_track_complete(struct per_dev_info *pdi, struct blk_io_trace *t) { - unsigned long long elapsed; + unsigned long long elapsed = -1ULL; struct io_track *iot; if (!track_ios) @@ -1085,15 +1155,18 @@ static unsigned long long log_track_complete(struct per_dev_info *pdi, if (!iot) { if (verbose) fprintf(stderr,"complete not found for (%d,%d): %llu\n", - MAJOR(pdi->dev), MINOR(pdi->dev), + MAJOR(t->device), MINOR(t->device), (unsigned long long) t->sector); return -1; } + io_warn_unless(t, iot->completion_time == -1ULL, + "confused about %s time", "completion"); iot->completion_time = t->time; - elapsed = iot->completion_time - iot->dispatch_time; + if (iot->dispatch_time != -1ULL) + elapsed = iot->completion_time - iot->dispatch_time; - if (per_process_stats) { + if (elapsed != -1ULL && per_process_stats) { struct per_process_info *ppi = find_ppi(iot->ppm->pid); int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; @@ -1119,6 +1192,8 @@ static struct io_stats *find_process_io_stats(pid_t pid) ppi = malloc(sizeof(*ppi)); memset(ppi, 0, sizeof(*ppi)); ppi->ppm = find_ppm(pid); + if (!ppi->ppm) + ppi->ppm = add_ppm_hash(pid, "unknown"); add_ppi_to_hash(ppi); add_ppi_to_list(ppi); } @@ -1149,10 +1224,12 @@ static inline void __account_m(struct io_stats *ios, struct blk_io_trace *t, { if (rw) { ios->mwrites++; - ios->qwrite_kb += t_kb(t); + ios->mwrite_kb += t_kb(t); + ios->mwrite_b += t_b(t); } else { ios->mreads++; - ios->qread_kb += t_kb(t); + ios->mread_kb += t_kb(t); + ios->mread_b += t_b(t); } } @@ -1168,15 +1245,115 @@ static inline void account_m(struct blk_io_trace *t, struct per_cpu_info *pci, } } +static inline void __account_pc_queue(struct io_stats *ios, + struct blk_io_trace *t, int rw) +{ + if (rw) { + ios->qwrites_pc++; + ios->qwrite_kb_pc += t_kb(t); + ios->qwrite_b_pc += t_b(t); + } else { + ios->qreads_pc++; + ios->qread_kb += t_kb(t); + ios->qread_b_pc += t_b(t); + } +} + +static inline void account_pc_queue(struct blk_io_trace *t, + struct per_cpu_info *pci, int rw) +{ + __account_pc_queue(&pci->io_stats, t, rw); + + if (per_process_stats) { + struct io_stats *ios = find_process_io_stats(t->pid); + + __account_pc_queue(ios, t, rw); + } +} + +static inline void __account_pc_issue(struct io_stats *ios, int rw, + unsigned int bytes) +{ + if (rw) { + ios->iwrites_pc++; + ios->iwrite_kb_pc += bytes >> 10; + ios->iwrite_b_pc += bytes & 1023; + } else { + ios->ireads_pc++; + ios->iread_kb_pc += bytes >> 10; + ios->iread_b_pc += bytes & 1023; + } +} + +static inline void account_pc_issue(struct blk_io_trace *t, + struct per_cpu_info *pci, int rw) +{ + __account_pc_issue(&pci->io_stats, rw, t->bytes); + + if (per_process_stats) { + struct io_stats *ios = find_process_io_stats(t->pid); + + __account_pc_issue(ios, rw, t->bytes); + } +} + +static inline void __account_pc_requeue(struct io_stats *ios, + struct blk_io_trace *t, int rw) +{ + if (rw) { + ios->wrqueue_pc++; + ios->iwrite_kb_pc -= t_kb(t); + ios->iwrite_b_pc -= t_b(t); + } else { + ios->rrqueue_pc++; + ios->iread_kb_pc -= t_kb(t); + ios->iread_b_pc -= t_b(t); + } +} + +static inline void account_pc_requeue(struct blk_io_trace *t, + struct per_cpu_info *pci, int rw) +{ + __account_pc_requeue(&pci->io_stats, t, rw); + + if (per_process_stats) { + struct io_stats *ios = find_process_io_stats(t->pid); + + __account_pc_requeue(ios, t, rw); + } +} + +static inline void __account_pc_c(struct io_stats *ios, int rw) +{ + if (rw) + ios->cwrites_pc++; + else + ios->creads_pc++; +} + +static inline void account_pc_c(struct blk_io_trace *t, + struct per_cpu_info *pci, int rw) +{ + __account_pc_c(&pci->io_stats, rw); + + if (per_process_stats) { + struct io_stats *ios = find_process_io_stats(t->pid); + + __account_pc_c(ios, rw); + } +} + static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t, int rw) { if (rw) { ios->qwrites++; ios->qwrite_kb += t_kb(t); + ios->qwrite_b += t_b(t); } else { ios->qreads++; ios->qread_kb += t_kb(t); + ios->qread_b += t_b(t); } } @@ -1197,9 +1374,11 @@ static inline void __account_c(struct io_stats *ios, int rw, int bytes) if (rw) { ios->cwrites++; ios->cwrite_kb += bytes >> 10; + ios->cwrite_b += bytes & 1023; } else { ios->creads++; ios->cread_kb += bytes >> 10; + ios->cread_b += bytes & 1023; } } @@ -1221,9 +1400,11 @@ static inline void __account_issue(struct io_stats *ios, int rw, if (rw) { ios->iwrites++; ios->iwrite_kb += bytes >> 10; + ios->iwrite_b += bytes & 1023; } else { ios->ireads++; ios->iread_kb += bytes >> 10; + ios->iread_b += bytes & 1023; } } @@ -1265,9 +1446,11 @@ static inline void __account_requeue(struct io_stats *ios, if (rw) { ios->wrqueue++; ios->iwrite_kb -= t_kb(t); + ios->iwrite_b -= t_b(t); } else { ios->rrqueue++; ios->iread_kb -= t_kb(t); + ios->iread_b -= t_b(t); } } @@ -1347,13 +1530,16 @@ static void log_pc(struct per_cpu_info *pci, struct blk_io_trace *t, char *act) process_fmt(act, pci, t, -1ULL, t->pdu_len, buf); } -static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci) +static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi, + struct per_cpu_info *pci) { + int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; int act = t->action & 0xffff; switch (act) { case __BLK_TA_QUEUE: log_generic(pci, t, "Q"); + account_pc_queue(t, pci, w); break; case __BLK_TA_GETRQ: log_generic(pci, t, "G"); @@ -1362,13 +1548,27 @@ static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci) log_generic(pci, t, "S"); break; case __BLK_TA_REQUEUE: + /* + * can happen if we miss traces, don't let it go + * below zero + */ + if (pdi->cur_depth[w]) + pdi->cur_depth[w]--; + account_pc_requeue(t, pci, w); log_generic(pci, t, "R"); break; case __BLK_TA_ISSUE: + account_pc_issue(t, pci, w); + pdi->cur_depth[w]++; + if (pdi->cur_depth[w] > pdi->max_depth[w]) + pdi->max_depth[w] = pdi->cur_depth[w]; log_pc(pci, t, "D"); break; case __BLK_TA_COMPLETE: + if (pdi->cur_depth[w]) + pdi->cur_depth[w]--; log_pc(pci, t, "C"); + account_pc_c(t, pci, w); break; case __BLK_TA_INSERT: log_pc(pci, t, "I"); @@ -1452,6 +1652,10 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi, case __BLK_TA_REMAP: log_generic(pci, t, "A"); break; + case __BLK_TA_DRV_DATA: + have_drv_data = 1; + /* dump to binary file only */ + break; default: fprintf(stderr, "Bad fs action %x\n", t->action); break; @@ -1462,8 +1666,10 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci, struct per_dev_info *pdi) { if (text_output) { - if (t->action & BLK_TC_ACT(BLK_TC_PC)) - dump_trace_pc(t, pci); + if (t->action == BLK_TN_MESSAGE) + handle_notify(t); + else if (t->action & BLK_TC_ACT(BLK_TC_PC)) + dump_trace_pc(t, pdi, pci); else dump_trace_fs(t, pdi, pci); } @@ -1473,7 +1679,10 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci, pdi->events++; - output_binary(t, sizeof(*t) + t->pdu_len); + if (bin_output_msgs || + !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) && + t->action == BLK_TN_MESSAGE)) + output_binary(t, sizeof(*t) + t->pdu_len); } /* @@ -1504,21 +1713,60 @@ static void dump_io_stats(struct per_dev_info *pdi, struct io_stats *ios, fprintf(ofp, "%s\n", msg); - fprintf(ofp, " Reads Queued: %s, %siB\t", size_cnv(x, ios->qreads, 0), size_cnv(y, ios->qread_kb, 1)); - fprintf(ofp, " Writes Queued: %s, %siB\n", size_cnv(x, ios->qwrites, 0), size_cnv(y, ios->qwrite_kb, 1)); - - fprintf(ofp, " Read Dispatches: %s, %siB\t", size_cnv(x, ios->ireads, 0), size_cnv(y, ios->iread_kb, 1)); - fprintf(ofp, " Write Dispatches: %s, %siB\n", size_cnv(x, ios->iwrites, 0), size_cnv(y, ios->iwrite_kb, 1)); + fprintf(ofp, " Reads Queued: %s, %siB\t", + size_cnv(x, ios->qreads, 0), + size_cnv(y, ios->qread_kb + (ios->qread_b>>10), 1)); + fprintf(ofp, " Writes Queued: %s, %siB\n", + size_cnv(x, ios->qwrites, 0), + size_cnv(y, ios->qwrite_kb + (ios->qwrite_b>>10), 1)); + fprintf(ofp, " Read Dispatches: %s, %siB\t", + size_cnv(x, ios->ireads, 0), + size_cnv(y, ios->iread_kb + (ios->iread_b>>10), 1)); + fprintf(ofp, " Write Dispatches: %s, %siB\n", + size_cnv(x, ios->iwrites, 0), + size_cnv(y, ios->iwrite_kb + (ios->iwrite_b>>10), 1)); fprintf(ofp, " Reads Requeued: %s\t\t", size_cnv(x, ios->rrqueue, 0)); fprintf(ofp, " Writes Requeued: %s\n", size_cnv(x, ios->wrqueue, 0)); - fprintf(ofp, " Reads Completed: %s, %siB\t", size_cnv(x, ios->creads, 0), size_cnv(y, ios->cread_kb, 1)); - fprintf(ofp, " Writes Completed: %s, %siB\n", size_cnv(x, ios->cwrites, 0), size_cnv(y, ios->cwrite_kb, 1)); - fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' '); - fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites); + fprintf(ofp, " Reads Completed: %s, %siB\t", + size_cnv(x, ios->creads, 0), + size_cnv(y, ios->cread_kb + (ios->cread_b>>10), 1)); + fprintf(ofp, " Writes Completed: %s, %siB\n", + size_cnv(x, ios->cwrites, 0), + size_cnv(y, ios->cwrite_kb + (ios->cwrite_b>>10), 1)); + fprintf(ofp, " Read Merges: %s, %siB\t", + size_cnv(x, ios->mreads, 0), + size_cnv(y, ios->mread_kb + (ios->mread_b>>10), 1)); + fprintf(ofp, " Write Merges: %s, %siB\n", + size_cnv(x, ios->mwrites, 0), + size_cnv(y, ios->mwrite_kb + (ios->mwrite_b>>10), 1)); if (pdi) { fprintf(ofp, " Read depth: %'8u%8c\t", pdi->max_depth[0], ' '); fprintf(ofp, " Write depth: %'8u\n", pdi->max_depth[1]); } + if (ios->qreads_pc || ios->qwrites_pc || ios->ireads_pc || ios->iwrites_pc || + ios->rrqueue_pc || ios->wrqueue_pc || ios->creads_pc || ios->cwrites_pc) { + fprintf(ofp, " PC Reads Queued: %s, %siB\t", + size_cnv(x, ios->qreads_pc, 0), + size_cnv(y, + ios->qread_kb_pc + (ios->qread_b_pc>>10), 1)); + fprintf(ofp, " PC Writes Queued: %s, %siB\n", + size_cnv(x, ios->qwrites_pc, 0), + size_cnv(y, + ios->qwrite_kb_pc + (ios->qwrite_b_pc>>10), 1)); + fprintf(ofp, " PC Read Disp.: %s, %siB\t", + size_cnv(x, ios->ireads_pc, 0), + size_cnv(y, + ios->iread_kb_pc + (ios->iread_b_pc>>10), 1)); + fprintf(ofp, " PC Write Disp.: %s, %siB\n", + size_cnv(x, ios->iwrites_pc, 0), + size_cnv(y, + ios->iwrite_kb_pc + (ios->iwrite_b_pc>>10), + 1)); + fprintf(ofp, " PC Reads Req.: %s\t\t", size_cnv(x, ios->rrqueue_pc, 0)); + fprintf(ofp, " PC Writes Req.: %s\n", size_cnv(x, ios->wrqueue_pc, 0)); + fprintf(ofp, " PC Reads Compl.: %s\t\t", size_cnv(x, ios->creads_pc, 0)); + fprintf(ofp, " PC Writes Compl.: %s\n", size_cnv(x, ios->cwrites_pc, 0)); + } fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' '); fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs); } @@ -1553,6 +1801,88 @@ static int ppi_name_compare(const void *p1, const void *p2) return res; } +static int ppi_event_compare(const void *p1, const void *p2) +{ + struct per_process_info *ppi1 = *((struct per_process_info **) p1); + struct per_process_info *ppi2 = *((struct per_process_info **) p2); + struct io_stats *ios1 = &ppi1->io_stats; + struct io_stats *ios2 = &ppi2->io_stats; + unsigned long io1, io2; + unsigned long long kb1,kb2; + int sort_by_kb = 1; + + io1 = io2 = 0; + kb1 = kb2 = 0; + + switch (per_process_stats_event) { + case SORT_PROG_EVENT_QKB: /* KB: Queued read and write */ + kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10) + + ios1->qread_kb + (ios1->qread_b>>10); + kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10) + + ios2->qread_kb + (ios2->qread_b>>10); + break; + case SORT_PROG_EVENT_RKB: /* KB: Queued Read */ + kb1 = ios1->qread_kb + (ios1->qread_b>>10); + kb2 = ios2->qread_kb + (ios2->qread_b>>10); + break; + case SORT_PROG_EVENT_WKB: /* KB: Queued Write */ + kb1 = ios1->qwrite_kb + (ios1->qwrite_b>>10); + kb2 = ios2->qwrite_kb + (ios2->qwrite_b>>10); + break; + case SORT_PROG_EVENT_CKB: /* KB: Complete */ + kb1 = ios1->cwrite_kb + (ios1->cwrite_b>>10) + + ios1->cread_kb + (ios1->cread_b>>10); + kb2 = ios2->cwrite_kb + (ios2->cwrite_b>>10) + + ios2->cread_kb + (ios2->cread_b>>10); + break; + case SORT_PROG_EVENT_QIO: /* IO: Queued read and write */ + sort_by_kb = 0; + io1 = ios1->qreads + ios1->qwrites; + io2 = ios2->qreads + ios2->qwrites; + break; + case SORT_PROG_EVENT_RIO: /* IO: Queued Read */ + sort_by_kb = 0; + io1 = ios1->qreads; + io2 = ios2->qreads; + break; + case SORT_PROG_EVENT_WIO: /* IO: Queued Write */ + sort_by_kb = 0; + io1 = ios1->qwrites; + io2 = ios2->qwrites; + break; + case SORT_PROG_EVENT_CIO: /* IO: Complete */ + sort_by_kb = 0; + io1 = ios1->creads + ios1->cwrites; + io2 = ios2->creads + ios2->cwrites; + break; + } + + + /* compare kb */ + if (sort_by_kb) { + if (kb1 > kb2) + return 1; + else if (kb1 == kb2) + return 0; + return -1; + } + + /* compare io */ + if (io1 > io2) + return 1; + else if (io1 == io2) + return 0; + return -1; +} + +static int ppi_compare(const void *p1, const void *p2) +{ + if (per_process_stats_event == SORT_PROG_EVENT_N) + return ppi_name_compare(p1, p2); + + return ppi_event_compare(p1, p2); +} + static void sort_process_list(void) { struct per_process_info **ppis; @@ -1567,7 +1897,7 @@ static void sort_process_list(void) ppi = ppi->list_next; } - qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_name_compare); + qsort(ppis, ppi_list_entries, sizeof(ppi), ppi_compare); i = ppi_list_entries - 1; ppi_list = NULL; @@ -1646,6 +1976,34 @@ static void show_device_and_cpu_stats(void) total.cwrite_kb += ios->cwrite_kb; total.iread_kb += ios->iread_kb; total.iwrite_kb += ios->iwrite_kb; + total.mread_kb += ios->mread_kb; + total.mwrite_kb += ios->mwrite_kb; + total.qread_b += ios->qread_b; + total.qwrite_b += ios->qwrite_b; + total.cread_b += ios->cread_b; + total.cwrite_b += ios->cwrite_b; + total.iread_b += ios->iread_b; + total.iwrite_b += ios->iwrite_b; + total.mread_b += ios->mread_b; + total.mwrite_b += ios->mwrite_b; + + total.qreads_pc += ios->qreads_pc; + total.qwrites_pc += ios->qwrites_pc; + total.creads_pc += ios->creads_pc; + total.cwrites_pc += ios->cwrites_pc; + total.ireads_pc += ios->ireads_pc; + total.iwrites_pc += ios->iwrites_pc; + total.rrqueue_pc += ios->rrqueue_pc; + total.wrqueue_pc += ios->wrqueue_pc; + total.qread_kb_pc += ios->qread_kb_pc; + total.qwrite_kb_pc += ios->qwrite_kb_pc; + total.iread_kb_pc += ios->iread_kb_pc; + total.iwrite_kb_pc += ios->iwrite_kb_pc; + total.qread_b_pc += ios->qread_b_pc; + total.qwrite_b_pc += ios->qwrite_b_pc; + total.iread_b_pc += ios->iread_b_pc; + total.iwrite_b_pc += ios->iwrite_b_pc; + total.timer_unplugs += ios->timer_unplugs; total.io_unplugs += ios->io_unplugs; @@ -1665,8 +2023,10 @@ static void show_device_and_cpu_stats(void) wrate = rrate = 0; msec = (pdi->last_reported_time - pdi->first_reported_time) / 1000000; if (msec) { - rrate = 1000 * total.cread_kb / msec; - wrate = 1000 * total.cwrite_kb / msec; + rrate = ((1000 * total.cread_kb) + total.cread_b) / + msec; + wrate = ((1000 * total.cwrite_kb) + total.cwrite_b) / + msec; } fprintf(ofp, "\nThroughput (R/W): %'LuKiB/s / %'LuKiB/s\n", @@ -1685,6 +2045,22 @@ static void show_device_and_cpu_stats(void) } } +static void correct_abs_start_time(void) +{ + long delta = genesis_time - start_timestamp; + + abs_start_time.tv_sec += SECONDS(delta); + abs_start_time.tv_nsec += NANO_SECONDS(delta); + if (abs_start_time.tv_nsec < 0) { + abs_start_time.tv_nsec += 1000000000; + abs_start_time.tv_sec -= 1; + } else + if (abs_start_time.tv_nsec > 1000000000) { + abs_start_time.tv_nsec -= 1000000000; + abs_start_time.tv_sec += 1; + } +} + static void find_genesis(void) { struct trace *t = trace_list; @@ -1702,18 +2078,7 @@ static void find_genesis(void) */ if (start_timestamp && start_timestamp != genesis_time) { - long delta = genesis_time - start_timestamp; - - abs_start_time.tv_sec += SECONDS(delta); - abs_start_time.tv_nsec += NANO_SECONDS(delta); - if (abs_start_time.tv_nsec < 0) { - abs_start_time.tv_nsec += 1000000000; - abs_start_time.tv_sec -= 1; - } else - if (abs_start_time.tv_nsec > 1000000000) { - abs_start_time.tv_nsec -= 1000000000; - abs_start_time.tv_sec += 1; - } + correct_abs_start_time(); } } @@ -1788,6 +2153,7 @@ static int check_cpu_map(struct per_dev_info *pdi) * create a map of the cpus we have traces for */ cpu_map = malloc(pdi->cpu_map_max / sizeof(long)); + memset(cpu_map, 0, sizeof(*cpu_map)); n = rb_first(&rb_sort_root); while (n) { __t = rb_entry(n, struct trace, rb_node); @@ -1891,7 +2257,8 @@ static void show_entries_rb(int force) break; } - if (check_sequence(pdi, t, force)) + if (!(bit->action == BLK_TN_MESSAGE) && + check_sequence(pdi, t, force)) break; if (!force && bit->time > last_allowed_time) @@ -1902,7 +2269,8 @@ static void show_entries_rb(int force) if (!pci || pci->cpu != bit->cpu) pci = get_cpu_info(pdi, bit->cpu); - pci->last_sequence = bit->sequence; + if (!(bit->action == BLK_TN_MESSAGE)) + pci->last_sequence = bit->sequence; pci->nelems++; @@ -2035,7 +2403,7 @@ static int read_events(int fd, int always_block, int *fdblock) /* * not a real trace, so grab and handle it here */ - if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) { + if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) { handle_notify(bit); output_binary(bit, sizeof(*bit) + bit->pdu_len); continue; @@ -2178,7 +2546,13 @@ static int ms_prime(struct ms_stream *msp) if (verify_trace(bit)) goto err; - if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) { + if (bit->cpu != pci->cpu) { + fprintf(stderr, "cpu %d trace info has error cpu %d\n", + pci->cpu, bit->cpu); + continue; + } + + if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) { handle_notify(bit); output_binary(bit, sizeof(*bit) + bit->pdu_len); bit_free(bit); @@ -2283,17 +2657,19 @@ static int handle(struct ms_stream *msp) struct blk_io_trace *bit; t = ms_peek(msp); - if (t->bit->time > stopwatch_end) - return 0; bit = t->bit; pdi = msp->pdi; pci = get_cpu_info(pdi, msp->cpu); pci->nelems++; - bit->time -= genesis_time; + + if (t->bit->time > stopwatch_end) + return 0; + pdi->last_reported_time = bit->time; - if (bit->action & (act_mask << BLK_TC_SHIFT)) + if ((bit->action & (act_mask << BLK_TC_SHIFT))&& + t->bit->time >= stopwatch_start) dump_trace(bit, pci, pdi); ms_deq(msp); @@ -2308,9 +2684,27 @@ static int handle(struct ms_stream *msp) return 1; } +/* + * Check if we need to sanitize the name. We allow 'foo', or if foo.blktrace.X + * is given, then strip back down to 'foo' to avoid missing files. + */ +static int name_fixup(char *name) +{ + char *b; + + if (!name) + return 1; + + b = strstr(name, ".blktrace."); + if (b) + *b = '\0'; + + return 0; +} + static int do_file(void) { - int i, cpu; + int i, cpu, ret; struct per_dev_info *pdi; /* @@ -2318,8 +2712,18 @@ static int do_file(void) */ for (i = 0; i < ndevices; i++) { pdi = &devices[i]; + ret = name_fixup(pdi->name); + if (ret) + return ret; + for (cpu = 0; setup_file(pdi, cpu); cpu++) ; + + if (!cpu) { + fprintf(stderr,"No input files found for %s\n", + pdi->name); + return 1; + } } /* @@ -2328,6 +2732,14 @@ static int do_file(void) if (ms_head) genesis_time = ms_peek_time(ms_head); + /* + * Correct abs_start_time if necessary + */ + if (start_timestamp + && start_timestamp != genesis_time) { + correct_abs_start_time(); + } + /* * Keep processing traces while any are left */ @@ -2456,7 +2868,46 @@ static int is_pipe(const char *str) return 0; } -#define S_OPTS "a:A:b:D:d:f:F:hi:o:Oqstw:vV" +static int get_program_sort_event(const char *str) +{ + char evt = str[0]; + + switch (evt) { + case 'N': + per_process_stats_event = SORT_PROG_EVENT_N; + break; + case 'Q': + per_process_stats_event = SORT_PROG_EVENT_QKB; + break; + case 'q': + per_process_stats_event = SORT_PROG_EVENT_QIO; + break; + case 'R': + per_process_stats_event = SORT_PROG_EVENT_RKB; + break; + case 'r': + per_process_stats_event = SORT_PROG_EVENT_RIO; + break; + case 'W': + per_process_stats_event = SORT_PROG_EVENT_WKB; + break; + case 'w': + per_process_stats_event = SORT_PROG_EVENT_WIO; + break; + case 'C': + per_process_stats_event = SORT_PROG_EVENT_CKB; + break; + case 'c': + per_process_stats_event = SORT_PROG_EVENT_CIO; + break; + default: + return 1; + } + + return 0; +} + +#define S_OPTS "a:A:b:D:d:f:F:hi:o:OqsS:tw:vVM" static char usage_str[] = "\n\n" \ "-i | --input=\n" \ "[ -a | --act-mask= ]\n" \ @@ -2471,10 +2922,14 @@ static char usage_str[] = "\n\n" \ "[ -O | --no-text-output ]\n" \ "[ -q | --quiet ]\n" \ "[ -s | --per-program-stats ]\n" \ + "[ -S | --sort-program-stats= ]\n" \ "[ -t | --track-ios ]\n" \ "[ -w