#include "rbtree.h"
#include "jhash.h"
-static char blkparse_version[] = "0.99.1";
+static char blkparse_version[] = "1.2.0";
struct skip_info {
unsigned long start, end;
static struct per_process_info *ppi_list;
static int ppi_list_entries;
-#define S_OPTS "a:A:b:D:d:f:F:hi:o:Oqstw:vV"
static struct option l_opts[] = {
{
.name = "act-mask",
.flag = NULL,
.val = 'i'
},
+ {
+ .name = "no-msgs",
+ .has_arg = no_argument,
+ .flag = NULL,
+ .val = 'M'
+ },
{
.name = "output",
.has_arg = required_argument,
static int verbose;
static unsigned int act_mask = -1U;
static int stats_printed;
+static int bin_output_msgs = 1;
int data_is_native = -1;
static FILE *dump_fp;
static unsigned int rb_batch = RB_BATCH_DEFAULT;
static int pipeline;
+static char *pipename;
static int text_output = 1;
#define is_done() (*(volatile int *)(&done))
static volatile int done;
+struct timespec abs_start_time;
+static unsigned long long start_timestamp;
+
+static int have_drv_data = 0;
+
#define JHASH_RANDOM (0x3af5f2ee)
#define CPUS_PER_LONG (8 * sizeof(unsigned long))
return NULL;
}
-static void add_ppm_hash(pid_t pid, const char *name)
+static struct process_pid_map *add_ppm_hash(pid_t pid, const char *name)
{
const int hash_idx = ppm_hash_pid(pid);
struct process_pid_map *ppm;
ppm = malloc(sizeof(*ppm));
memset(ppm, 0, sizeof(*ppm));
ppm->pid = pid;
- strcpy(ppm->comm, name);
+ memset(ppm->comm, 0, sizeof(ppm->comm));
+ strncpy(ppm->comm, name, sizeof(ppm->comm));
+ ppm->comm[sizeof(ppm->comm) - 1] = '\0';
ppm->hash_next = ppm_hash_table[hash_idx];
ppm_hash_table[hash_idx] = ppm;
}
+
+ return ppm;
+}
+
+static void handle_notify(struct blk_io_trace *bit)
+{
+ void *payload = (caddr_t) bit + sizeof(*bit);
+ __u32 two32[2];
+
+ switch (bit->action) {
+ case BLK_TN_PROCESS:
+ add_ppm_hash(bit->pid, payload);
+ break;
+
+ case BLK_TN_TIMESTAMP:
+ if (bit->pdu_len != sizeof(two32))
+ return;
+ memcpy(two32, payload, sizeof(two32));
+ if (!data_is_native) {
+ two32[0] = be32_to_cpu(two32[0]);
+ two32[1] = be32_to_cpu(two32[1]);
+ }
+ start_timestamp = bit->time;
+ abs_start_time.tv_sec = two32[0];
+ abs_start_time.tv_nsec = two32[1];
+ if (abs_start_time.tv_nsec < 0) {
+ abs_start_time.tv_sec--;
+ abs_start_time.tv_nsec += 1000000000;
+ }
+
+ break;
+
+ case BLK_TN_MESSAGE:
+ if (bit->pdu_len > 0) {
+ char msg[bit->pdu_len+1];
+
+ memcpy(msg, (char *)payload, bit->pdu_len);
+ msg[bit->pdu_len] = '\0';
+
+ fprintf(ofp,
+ "%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
+ MAJOR(bit->device), MINOR(bit->device),
+ bit->cpu, "0", (int) SECONDS(bit->time),
+ (unsigned long) NANO_SECONDS(bit->time),
+ 0, "m", "N", msg);
+ }
+ break;
+
+ default:
+ /* Ignore unknown notify events */
+ ;
+ }
}
char *find_process_name(pid_t pid)
if (!iot) {
iot = malloc(sizeof(*iot));
iot->ppm = find_ppm(pid);
+ if (!iot->ppm)
+ iot->ppm = add_ppm_hash(pid, "unknown");
iot->sector = sector;
track_rb_insert(pdi, iot);
}
ppi = malloc(sizeof(*ppi));
memset(ppi, 0, sizeof(*ppi));
ppi->ppm = find_ppm(pid);
+ if (!ppi->ppm)
+ ppi->ppm = add_ppm_hash(pid, "unknown");
add_ppi_to_hash(ppi);
add_ppi_to_list(ppi);
}
{
if (rw) {
ios->mwrites++;
- ios->qwrite_kb += t_kb(t);
+ ios->mwrite_kb += t_kb(t);
+ ios->mwrite_b += t_b(t);
} else {
ios->mreads++;
- ios->qread_kb += t_kb(t);
+ ios->mread_kb += t_kb(t);
+ ios->mread_b += t_b(t);
}
}
}
}
+static inline void __account_pc_queue(struct io_stats *ios,
+ struct blk_io_trace *t, int rw)
+{
+ if (rw) {
+ ios->qwrites_pc++;
+ ios->qwrite_kb_pc += t_kb(t);
+ ios->qwrite_b_pc += t_b(t);
+ } else {
+ ios->qreads_pc++;
+ ios->qread_kb += t_kb(t);
+ ios->qread_b_pc += t_b(t);
+ }
+}
+
+static inline void account_pc_queue(struct blk_io_trace *t,
+ struct per_cpu_info *pci, int rw)
+{
+ __account_pc_queue(&pci->io_stats, t, rw);
+
+ if (per_process_stats) {
+ struct io_stats *ios = find_process_io_stats(t->pid);
+
+ __account_pc_queue(ios, t, rw);
+ }
+}
+
+static inline void __account_pc_issue(struct io_stats *ios, int rw,
+ unsigned int bytes)
+{
+ if (rw) {
+ ios->iwrites_pc++;
+ ios->iwrite_kb_pc += bytes >> 10;
+ ios->iwrite_b_pc += bytes & 1023;
+ } else {
+ ios->ireads_pc++;
+ ios->iread_kb_pc += bytes >> 10;
+ ios->iread_b_pc += bytes & 1023;
+ }
+}
+
+static inline void account_pc_issue(struct blk_io_trace *t,
+ struct per_cpu_info *pci, int rw)
+{
+ __account_pc_issue(&pci->io_stats, rw, t->bytes);
+
+ if (per_process_stats) {
+ struct io_stats *ios = find_process_io_stats(t->pid);
+
+ __account_pc_issue(ios, rw, t->bytes);
+ }
+}
+
+static inline void __account_pc_requeue(struct io_stats *ios,
+ struct blk_io_trace *t, int rw)
+{
+ if (rw) {
+ ios->wrqueue_pc++;
+ ios->iwrite_kb_pc -= t_kb(t);
+ ios->iwrite_b_pc -= t_b(t);
+ } else {
+ ios->rrqueue_pc++;
+ ios->iread_kb_pc -= t_kb(t);
+ ios->iread_b_pc -= t_b(t);
+ }
+}
+
+static inline void account_pc_requeue(struct blk_io_trace *t,
+ struct per_cpu_info *pci, int rw)
+{
+ __account_pc_requeue(&pci->io_stats, t, rw);
+
+ if (per_process_stats) {
+ struct io_stats *ios = find_process_io_stats(t->pid);
+
+ __account_pc_requeue(ios, t, rw);
+ }
+}
+
+static inline void __account_pc_c(struct io_stats *ios, int rw)
+{
+ if (rw)
+ ios->cwrites_pc++;
+ else
+ ios->creads_pc++;
+}
+
+static inline void account_pc_c(struct blk_io_trace *t,
+ struct per_cpu_info *pci, int rw)
+{
+ __account_pc_c(&pci->io_stats, rw);
+
+ if (per_process_stats) {
+ struct io_stats *ios = find_process_io_stats(t->pid);
+
+ __account_pc_c(ios, rw);
+ }
+}
+
static inline void __account_queue(struct io_stats *ios, struct blk_io_trace *t,
int rw)
{
if (rw) {
ios->qwrites++;
ios->qwrite_kb += t_kb(t);
+ ios->qwrite_b += t_b(t);
} else {
ios->qreads++;
ios->qread_kb += t_kb(t);
+ ios->qread_b += t_b(t);
}
}
if (rw) {
ios->cwrites++;
ios->cwrite_kb += bytes >> 10;
+ ios->cwrite_b += bytes & 1023;
} else {
ios->creads++;
ios->cread_kb += bytes >> 10;
+ ios->cread_b += bytes & 1023;
}
}
if (rw) {
ios->iwrites++;
ios->iwrite_kb += bytes >> 10;
+ ios->iwrite_b += bytes & 1023;
} else {
ios->ireads++;
ios->iread_kb += bytes >> 10;
+ ios->iread_b += bytes & 1023;
}
}
if (rw) {
ios->wrqueue++;
ios->iwrite_kb -= t_kb(t);
+ ios->iwrite_b -= t_b(t);
} else {
ios->rrqueue++;
ios->iread_kb -= t_kb(t);
+ ios->iread_b -= t_b(t);
}
}
process_fmt(act, pci, t, -1ULL, t->pdu_len, buf);
}
-static void dump_trace_pc(struct blk_io_trace *t, struct per_cpu_info *pci)
+static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
+ struct per_cpu_info *pci)
{
+ int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
int act = t->action & 0xffff;
switch (act) {
case __BLK_TA_QUEUE:
log_generic(pci, t, "Q");
+ account_pc_queue(t, pci, w);
break;
case __BLK_TA_GETRQ:
log_generic(pci, t, "G");
log_generic(pci, t, "S");
break;
case __BLK_TA_REQUEUE:
+ /*
+ * can happen if we miss traces, don't let it go
+ * below zero
+ */
+ if (pdi->cur_depth[w])
+ pdi->cur_depth[w]--;
+ account_pc_requeue(t, pci, w);
log_generic(pci, t, "R");
break;
case __BLK_TA_ISSUE:
+ account_pc_issue(t, pci, w);
+ pdi->cur_depth[w]++;
+ if (pdi->cur_depth[w] > pdi->max_depth[w])
+ pdi->max_depth[w] = pdi->cur_depth[w];
log_pc(pci, t, "D");
break;
case __BLK_TA_COMPLETE:
+ if (pdi->cur_depth[w])
+ pdi->cur_depth[w]--;
log_pc(pci, t, "C");
+ account_pc_c(t, pci, w);
break;
case __BLK_TA_INSERT:
log_pc(pci, t, "I");
case __BLK_TA_REMAP:
log_generic(pci, t, "A");
break;
+ case __BLK_TA_DRV_DATA:
+ have_drv_data = 1;
+ /* dump to binary file only */
+ break;
default:
fprintf(stderr, "Bad fs action %x\n", t->action);
break;
struct per_dev_info *pdi)
{
if (text_output) {
- if (t->action & BLK_TC_ACT(BLK_TC_PC))
- dump_trace_pc(t, pci);
+ if (t->action == BLK_TN_MESSAGE)
+ handle_notify(t);
+ else if (t->action & BLK_TC_ACT(BLK_TC_PC))
+ dump_trace_pc(t, pdi, pci);
else
dump_trace_fs(t, pdi, pci);
}
pdi->events++;
- output_binary(t, sizeof(*t) + t->pdu_len);
+ if (bin_output_msgs ||
+ !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
+ t->action == BLK_TN_MESSAGE))
+ output_binary(t, sizeof(*t) + t->pdu_len);
}
/*
fprintf(ofp, "%s\n", msg);
- fprintf(ofp, " Reads Queued: %s, %siB\t", size_cnv(x, ios->qreads, 0), size_cnv(y, ios->qread_kb, 1));
- fprintf(ofp, " Writes Queued: %s, %siB\n", size_cnv(x, ios->qwrites, 0), size_cnv(y, ios->qwrite_kb, 1));
-
- fprintf(ofp, " Read Dispatches: %s, %siB\t", size_cnv(x, ios->ireads, 0), size_cnv(y, ios->iread_kb, 1));
- fprintf(ofp, " Write Dispatches: %s, %siB\n", size_cnv(x, ios->iwrites, 0), size_cnv(y, ios->iwrite_kb, 1));
+ fprintf(ofp, " Reads Queued: %s, %siB\t",
+ size_cnv(x, ios->qreads, 0),
+ size_cnv(y, ios->qread_kb + (ios->qread_b>>10), 1));
+ fprintf(ofp, " Writes Queued: %s, %siB\n",
+ size_cnv(x, ios->qwrites, 0),
+ size_cnv(y, ios->qwrite_kb + (ios->qwrite_b>>10), 1));
+ fprintf(ofp, " Read Dispatches: %s, %siB\t",
+ size_cnv(x, ios->ireads, 0),
+ size_cnv(y, ios->iread_kb + (ios->iread_b>>10), 1));
+ fprintf(ofp, " Write Dispatches: %s, %siB\n",
+ size_cnv(x, ios->iwrites, 0),
+ size_cnv(y, ios->iwrite_kb + (ios->iwrite_b>>10), 1));
fprintf(ofp, " Reads Requeued: %s\t\t", size_cnv(x, ios->rrqueue, 0));
fprintf(ofp, " Writes Requeued: %s\n", size_cnv(x, ios->wrqueue, 0));
- fprintf(ofp, " Reads Completed: %s, %siB\t", size_cnv(x, ios->creads, 0), size_cnv(y, ios->cread_kb, 1));
- fprintf(ofp, " Writes Completed: %s, %siB\n", size_cnv(x, ios->cwrites, 0), size_cnv(y, ios->cwrite_kb, 1));
- fprintf(ofp, " Read Merges: %'8lu%8c\t", ios->mreads, ' ');
- fprintf(ofp, " Write Merges: %'8lu\n", ios->mwrites);
+ fprintf(ofp, " Reads Completed: %s, %siB\t",
+ size_cnv(x, ios->creads, 0),
+ size_cnv(y, ios->cread_kb + (ios->cread_b>>10), 1));
+ fprintf(ofp, " Writes Completed: %s, %siB\n",
+ size_cnv(x, ios->cwrites, 0),
+ size_cnv(y, ios->cwrite_kb + (ios->cwrite_b>>10), 1));
+ fprintf(ofp, " Read Merges: %s, %siB\t",
+ size_cnv(x, ios->mreads, 0),
+ size_cnv(y, ios->mread_kb + (ios->mread_b>>10), 1));
+ fprintf(ofp, " Write Merges: %s, %siB\n",
+ size_cnv(x, ios->mwrites, 0),
+ size_cnv(y, ios->mwrite_kb + (ios->mwrite_b>>10), 1));
if (pdi) {
fprintf(ofp, " Read depth: %'8u%8c\t", pdi->max_depth[0], ' ');
fprintf(ofp, " Write depth: %'8u\n", pdi->max_depth[1]);
}
+ if (ios->qreads_pc || ios->qwrites_pc || ios->ireads_pc || ios->iwrites_pc ||
+ ios->rrqueue_pc || ios->wrqueue_pc || ios->creads_pc || ios->cwrites_pc) {
+ fprintf(ofp, " PC Reads Queued: %s, %siB\t",
+ size_cnv(x, ios->qreads_pc, 0),
+ size_cnv(y,
+ ios->qread_kb_pc + (ios->qread_b_pc>>10), 1));
+ fprintf(ofp, " PC Writes Queued: %s, %siB\n",
+ size_cnv(x, ios->qwrites_pc, 0),
+ size_cnv(y,
+ ios->qwrite_kb_pc + (ios->qwrite_b_pc>>10), 1));
+ fprintf(ofp, " PC Read Disp.: %s, %siB\t",
+ size_cnv(x, ios->ireads_pc, 0),
+ size_cnv(y,
+ ios->iread_kb_pc + (ios->iread_b_pc>>10), 1));
+ fprintf(ofp, " PC Write Disp.: %s, %siB\n",
+ size_cnv(x, ios->iwrites_pc, 0),
+ size_cnv(y,
+ ios->iwrite_kb_pc + (ios->iwrite_b_pc>>10),
+ 1));
+ fprintf(ofp, " PC Reads Req.: %s\t\t", size_cnv(x, ios->rrqueue_pc, 0));
+ fprintf(ofp, " PC Writes Req.: %s\n", size_cnv(x, ios->wrqueue_pc, 0));
+ fprintf(ofp, " PC Reads Compl.: %s\t\t", size_cnv(x, ios->creads_pc, 0));
+ fprintf(ofp, " PC Writes Compl.: %s\n", size_cnv(x, ios->cwrites_pc, 0));
+ }
fprintf(ofp, " IO unplugs: %'8lu%8c\t", ios->io_unplugs, ' ');
fprintf(ofp, " Timer unplugs: %'8lu\n", ios->timer_unplugs);
}
int i, j, pci_events;
char line[3 + 8/*cpu*/ + 2 + 32/*dev*/ + 3];
char name[32];
+ double ratio;
for (pdi = devices, i = 0; i < ndevices; i++, pdi++) {
total.cwrite_kb += ios->cwrite_kb;
total.iread_kb += ios->iread_kb;
total.iwrite_kb += ios->iwrite_kb;
+ total.mread_kb += ios->mread_kb;
+ total.mwrite_kb += ios->mwrite_kb;
+ total.qread_b += ios->qread_b;
+ total.qwrite_b += ios->qwrite_b;
+ total.cread_b += ios->cread_b;
+ total.cwrite_b += ios->cwrite_b;
+ total.iread_b += ios->iread_b;
+ total.iwrite_b += ios->iwrite_b;
+ total.mread_b += ios->mread_b;
+ total.mwrite_b += ios->mwrite_b;
+
+ total.qreads_pc += ios->qreads_pc;
+ total.qwrites_pc += ios->qwrites_pc;
+ total.creads_pc += ios->creads_pc;
+ total.cwrites_pc += ios->cwrites_pc;
+ total.ireads_pc += ios->ireads_pc;
+ total.iwrites_pc += ios->iwrites_pc;
+ total.rrqueue_pc += ios->rrqueue_pc;
+ total.wrqueue_pc += ios->wrqueue_pc;
+ total.qread_kb_pc += ios->qread_kb_pc;
+ total.qwrite_kb_pc += ios->qwrite_kb_pc;
+ total.iread_kb_pc += ios->iread_kb_pc;
+ total.iwrite_kb_pc += ios->iwrite_kb_pc;
+ total.qread_b_pc += ios->qread_b_pc;
+ total.qwrite_b_pc += ios->qwrite_b_pc;
+ total.iread_b_pc += ios->iread_b_pc;
+ total.iwrite_b_pc += ios->iwrite_b_pc;
+
total.timer_unplugs += ios->timer_unplugs;
total.io_unplugs += ios->io_unplugs;
wrate = rrate = 0;
msec = (pdi->last_reported_time - pdi->first_reported_time) / 1000000;
if (msec) {
- rrate = 1000 * total.cread_kb / msec;
- wrate = 1000 * total.cwrite_kb / msec;
+ rrate = ((1000 * total.cread_kb) + total.cread_b) /
+ msec;
+ wrate = ((1000 * total.cwrite_kb) + total.cwrite_b) /
+ msec;
}
fprintf(ofp, "\nThroughput (R/W): %'LuKiB/s / %'LuKiB/s\n",
get_dev_name(pdi, line, sizeof(line)), pdi->events);
collect_pdi_skips(pdi);
+ if (!pdi->skips && !pdi->events)
+ ratio = 0.0;
+ else
+ ratio = 100.0 * ((double)pdi->seq_skips /
+ (double)(pdi->events + pdi->seq_skips));
fprintf(ofp, "Skips: %'lu forward (%'llu - %5.1lf%%)\n",
- pdi->skips,pdi->seq_skips,
- 100.0 * ((double)pdi->seq_skips /
- (double)(pdi->events + pdi->seq_skips)));
+ pdi->skips, pdi->seq_skips, ratio);
}
}
t = t->next;
}
+
+ /* The time stamp record will usually be the first
+ * record in the trace, but not always.
+ */
+ if (start_timestamp
+ && start_timestamp != genesis_time) {
+ long delta = genesis_time - start_timestamp;
+
+ abs_start_time.tv_sec += SECONDS(delta);
+ abs_start_time.tv_nsec += NANO_SECONDS(delta);
+ if (abs_start_time.tv_nsec < 0) {
+ abs_start_time.tv_nsec += 1000000000;
+ abs_start_time.tv_sec -= 1;
+ } else
+ if (abs_start_time.tv_nsec > 1000000000) {
+ abs_start_time.tv_nsec -= 1000000000;
+ abs_start_time.tv_sec += 1;
+ }
+ }
}
static inline int check_stopwatch(struct blk_io_trace *bit)
* create a map of the cpus we have traces for
*/
cpu_map = malloc(pdi->cpu_map_max / sizeof(long));
+ memset(cpu_map, 0, sizeof(*cpu_map));
n = rb_first(&rb_sort_root);
while (n) {
__t = rb_entry(n, struct trace, rb_node);
break;
}
- if (check_sequence(pdi, t, force))
+ if (!(bit->action == BLK_TN_MESSAGE) &&
+ check_sequence(pdi, t, force))
break;
if (!force && bit->time > last_allowed_time)
if (!pci || pci->cpu != bit->cpu)
pci = get_cpu_info(pdi, bit->cpu);
- pci->last_sequence = bit->sequence;
+ if (!(bit->action == BLK_TN_MESSAGE))
+ pci->last_sequence = bit->sequence;
pci->nelems++;
/*
* not a real trace, so grab and handle it here
*/
- if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY)) {
- add_ppm_hash(bit->pid, (char *) bit + sizeof(*bit));
+ if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+ handle_notify(bit);
output_binary(bit, sizeof(*bit) + bit->pdu_len);
continue;
}
return events;
}
-static int do_file(void)
-{
- struct per_cpu_info *pci;
+/*
+ * Managing input streams
+ */
+
+struct ms_stream {
+ struct ms_stream *next;
+ struct trace *first, *last;
struct per_dev_info *pdi;
- int i, j, events, events_added;
+ unsigned int cpu;
+};
- /*
- * first prepare all files for reading
- */
- for (i = 0; i < ndevices; i++) {
- pdi = &devices[i];
- pdi->nfiles = 0;
+#define MS_HASH(d, c) ((MAJOR(d) & 0xff) ^ (MINOR(d) & 0xff) ^ (cpu & 0xff))
- for (j = 0;; j++) {
- struct stat st;
- int len = 0;
- char *p, *dname;
+struct ms_stream *ms_head;
+struct ms_stream *ms_hash[256];
- pci = get_cpu_info(pdi, j);
- pci->cpu = j;
- pci->fd = -1;
- pci->fdblock = -1;
-
- p = strdup(pdi->name);
- dname = dirname(p);
- if (strcmp(dname, ".")) {
- input_dir = dname;
- p = strdup(pdi->name);
- strcpy(pdi->name, basename(p));
- }
- free(p);
+static void ms_sort(struct ms_stream *msp);
+static int ms_prime(struct ms_stream *msp);
+
+static inline struct trace *ms_peek(struct ms_stream *msp)
+{
+ return (msp == NULL) ? NULL : msp->first;
+}
- if (input_dir)
- len = sprintf(pci->fname, "%s/", input_dir);
+static inline __u64 ms_peek_time(struct ms_stream *msp)
+{
+ return ms_peek(msp)->bit->time;
+}
- snprintf(pci->fname + len, sizeof(pci->fname)-1-len,
- "%s.blktrace.%d", pdi->name, pci->cpu);
- if (stat(pci->fname, &st) < 0)
- break;
- if (st.st_size) {
- pci->fd = open(pci->fname, O_RDONLY);
- if (pci->fd < 0) {
- perror(pci->fname);
- continue;
- }
- }
+static inline void ms_resort(struct ms_stream *msp)
+{
+ if (msp->next && ms_peek_time(msp) > ms_peek_time(msp->next)) {
+ ms_head = msp->next;
+ msp->next = NULL;
+ ms_sort(msp);
+ }
+}
- printf("Input file %s added\n", pci->fname);
- pdi->nfiles++;
- cpu_mark_online(pdi, pci->cpu);
+static inline void ms_deq(struct ms_stream *msp)
+{
+ msp->first = msp->first->next;
+ if (!msp->first) {
+ msp->last = NULL;
+ if (!ms_prime(msp)) {
+ ms_head = msp->next;
+ msp->next = NULL;
+ return;
}
}
- /*
- * now loop over the files reading in the data
- */
- do {
- unsigned long long youngest;
+ ms_resort(msp);
+}
- events_added = 0;
- last_allowed_time = -1ULL;
- read_sequence++;
+static void ms_sort(struct ms_stream *msp)
+{
+ __u64 msp_t = ms_peek_time(msp);
+ struct ms_stream *this_msp = ms_head;
- for (i = 0; i < ndevices; i++) {
- pdi = &devices[i];
- pdi->last_read_time = -1ULL;
+ if (this_msp == NULL)
+ ms_head = msp;
+ else if (msp_t < ms_peek_time(this_msp)) {
+ msp->next = this_msp;
+ ms_head = msp;
+ }
+ else {
+ while (this_msp->next && ms_peek_time(this_msp->next) < msp_t)
+ this_msp = this_msp->next;
- for (j = 0; j < pdi->nfiles; j++) {
+ msp->next = this_msp->next;
+ this_msp->next = msp;
+ }
+}
- pci = get_cpu_info(pdi, j);
+static int ms_prime(struct ms_stream *msp)
+{
+ __u32 magic;
+ unsigned int i;
+ struct trace *t;
+ struct per_dev_info *pdi = msp->pdi;
+ struct per_cpu_info *pci = get_cpu_info(pdi, msp->cpu);
+ struct blk_io_trace *bit = NULL;
+ int ret, pdu_len, ndone = 0;
- if (pci->fd == -1)
- continue;
+ for (i = 0; !is_done() && pci->fd >= 0 && i < rb_batch; i++) {
+ bit = bit_alloc();
+ ret = read_data(pci->fd, bit, sizeof(*bit), 1, &pci->fdblock);
+ if (ret)
+ goto err;
- pci->smallest_seq_read = -1;
+ if (data_is_native == -1 && check_data_endianness(bit->magic))
+ goto err;
- events = read_events(pci->fd, 1, &pci->fdblock);
- if (events <= 0) {
- cpu_mark_offline(pdi, pci->cpu);
- close(pci->fd);
- pci->fd = -1;
- continue;
- }
+ magic = get_magic(bit);
+ if ((magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
+ fprintf(stderr, "Bad magic %x\n", magic);
+ goto err;
- if (pdi->last_read_time < last_allowed_time)
- last_allowed_time = pdi->last_read_time;
+ }
- events_added += events;
+ pdu_len = get_pdulen(bit);
+ if (pdu_len) {
+ void *ptr = realloc(bit, sizeof(*bit) + pdu_len);
+ ret = read_data(pci->fd, ptr + sizeof(*bit), pdu_len,
+ 1, &pci->fdblock);
+ if (ret) {
+ free(ptr);
+ bit = NULL;
+ goto err;
}
+
+ bit = ptr;
}
- if (sort_entries(&youngest))
- break;
+ trace_to_cpu(bit);
+ if (verify_trace(bit))
+ goto err;
- if (youngest > stopwatch_end)
- break;
+ if (bit->cpu != pci->cpu) {
+ fprintf(stderr, "cpu %d trace info has error cpu %d\n",
+ pci->cpu, bit->cpu);
+ continue;
+ }
- show_entries_rb(0);
+ if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+ handle_notify(bit);
+ output_binary(bit, sizeof(*bit) + bit->pdu_len);
+ bit_free(bit);
+
+ i -= 1;
+ continue;
+ }
- } while (events_added);
+ if (bit->time > pdi->last_read_time)
+ pdi->last_read_time = bit->time;
- if (rb_sort_entries)
- show_entries_rb(1);
+ t = t_alloc();
+ memset(t, 0, sizeof(*t));
+ t->bit = bit;
+
+ if (msp->first == NULL)
+ msp->first = msp->last = t;
+ else {
+ msp->last->next = t;
+ msp->last = t;
+ }
+
+ ndone++;
+ }
+
+ return ndone;
+
+err:
+ if (bit) bit_free(bit);
+
+ cpu_mark_offline(pdi, pci->cpu);
+ close(pci->fd);
+ pci->fd = -1;
+
+ return ndone;
+}
+
+static struct ms_stream *ms_alloc(struct per_dev_info *pdi, int cpu)
+{
+ struct ms_stream *msp = malloc(sizeof(*msp));
+
+ msp->next = NULL;
+ msp->first = msp->last = NULL;
+ msp->pdi = pdi;
+ msp->cpu = cpu;
+
+ if (ms_prime(msp))
+ ms_sort(msp);
+
+ return msp;
+}
+
+static int setup_file(struct per_dev_info *pdi, int cpu)
+{
+ int len = 0;
+ struct stat st;
+ char *p, *dname;
+ struct per_cpu_info *pci = get_cpu_info(pdi, cpu);
+
+ pci->cpu = cpu;
+ pci->fdblock = -1;
+
+ p = strdup(pdi->name);
+ dname = dirname(p);
+ if (strcmp(dname, ".")) {
+ input_dir = dname;
+ p = strdup(pdi->name);
+ strcpy(pdi->name, basename(p));
+ }
+ free(p);
+
+ if (input_dir)
+ len = sprintf(pci->fname, "%s/", input_dir);
+
+ snprintf(pci->fname + len, sizeof(pci->fname)-1-len,
+ "%s.blktrace.%d", pdi->name, pci->cpu);
+ if (stat(pci->fname, &st) < 0)
+ return 0;
+ if (!st.st_size)
+ return 1;
+
+ pci->fd = open(pci->fname, O_RDONLY);
+ if (pci->fd < 0) {
+ perror(pci->fname);
+ return 0;
+ }
+
+ printf("Input file %s added\n", pci->fname);
+ cpu_mark_online(pdi, pci->cpu);
+
+ pdi->nfiles++;
+ ms_alloc(pdi, pci->cpu);
+
+ return 1;
+}
+
+static int handle(struct ms_stream *msp)
+{
+ struct trace *t;
+ struct per_dev_info *pdi;
+ struct per_cpu_info *pci;
+ struct blk_io_trace *bit;
+
+ t = ms_peek(msp);
+
+ bit = t->bit;
+ pdi = msp->pdi;
+ pci = get_cpu_info(pdi, msp->cpu);
+ pci->nelems++;
+ bit->time -= genesis_time;
+
+ if (t->bit->time > stopwatch_end)
+ return 0;
+
+ pdi->last_reported_time = bit->time;
+ if ((bit->action & (act_mask << BLK_TC_SHIFT))&&
+ t->bit->time >= stopwatch_start)
+ dump_trace(bit, pci, pdi);
+
+ ms_deq(msp);
+
+ if (text_output)
+ trace_rb_insert_last(pdi, t);
+ else {
+ bit_free(t->bit);
+ t_free(t);
+ }
+
+ return 1;
+}
+
+/*
+ * Check if we need to sanitize the name. We allow 'foo', or if foo.blktrace.X
+ * is given, then strip back down to 'foo' to avoid missing files.
+ */
+static int name_fixup(char *name)
+{
+ char *b;
+
+ if (!name)
+ return 1;
+
+ b = strstr(name, ".blktrace.");
+ if (b)
+ *b = '\0';
return 0;
}
-static int do_stdin(void)
+static int do_file(void)
{
- unsigned long long youngest;
- int fd, events, fdblock;
+ int i, cpu, ret;
+ struct per_dev_info *pdi;
- last_allowed_time = -1ULL;
- fd = dup(STDIN_FILENO);
- if (fd == -1) {
- perror("dup stdin");
- return -1;
+ /*
+ * first prepare all files for reading
+ */
+ for (i = 0; i < ndevices; i++) {
+ pdi = &devices[i];
+ ret = name_fixup(pdi->name);
+ if (ret)
+ return ret;
+
+ for (cpu = 0; setup_file(pdi, cpu); cpu++)
+ ;
+
+ if (!cpu) {
+ fprintf(stderr,"No input files found for %s\n",
+ pdi->name);
+ return 1;
+ }
}
+ /*
+ * Get the initial time stamp
+ */
+ if (ms_head)
+ genesis_time = ms_peek_time(ms_head);
+
+ /*
+ * Keep processing traces while any are left
+ */
+ while (!is_done() && ms_head && handle(ms_head))
+ ;
+
+ return 0;
+}
+
+static void do_pipe(int fd)
+{
+ unsigned long long youngest;
+ int events, fdblock;
+
+ last_allowed_time = -1ULL;
fdblock = -1;
while ((events = read_events(fd, 0, &fdblock)) > 0) {
read_sequence++;
if (rb_sort_entries)
show_entries_rb(1);
+}
+
+static int do_fifo(void)
+{
+ int fd;
+
+ if (!strcmp(pipename, "-"))
+ fd = dup(STDIN_FILENO);
+ else
+ fd = open(pipename, O_RDONLY);
+ if (fd == -1) {
+ perror("dup stdin");
+ return -1;
+ }
+
+ do_pipe(fd);
close(fd);
return 0;
}
return 0;
}
-#define S_OPTS "a:A:b:D:d:f:F:hi:o:Oqstw:vV"
+static int is_pipe(const char *str)
+{
+ struct stat st;
+
+ if (!strcmp(str, "-"))
+ return 1;
+ if (!stat(str, &st) && S_ISFIFO(st.st_mode))
+ return 1;
+
+ return 0;
+}
+
+#define S_OPTS "a:A:b:D:d:f:F:hi:o:Oqstw:vVM"
static char usage_str[] = "\n\n" \
"-i <file> | --input=<file>\n" \
"[ -a <action field> | --act-mask=<action field> ]\n" \
"[ -s | --per-program-stats ]\n" \
"[ -t | --track-ios ]\n" \
"[ -w <time> | --stopwatch=<time> ]\n" \
+ "[ -M | --no-msgs\n" \
"[ -v | --verbose ]\n" \
"[ -V | --version ]\n\n" \
+ "\t-a Only trace specified actions. See documentation\n" \
+ "\t-A Give trace mask as a single value. See documentation\n" \
"\t-b stdin read batching\n" \
"\t-d Output file. If specified, binary data is written to file\n" \
"\t-D Directory to prepend to input file names\n" \
"\t to get queued, to get dispatched, and to get completed\n" \
"\t-w Only parse data between the given time interval in seconds.\n" \
"\t If 'start' isn't given, blkparse defaults the start time to 0\n" \
+ "\t-M Do not output messages to binary file\n" \
"\t-v More verbose for marginal errors\n" \
"\t-V Print program version info\n\n";
static void usage(char *prog)
{
- fprintf(stderr, "Usage: %s %s %s", prog, blkparse_version, usage_str);
+ fprintf(stderr, "Usage: %s %s", prog, usage_str);
}
int main(int argc, char *argv[])
act_mask_tmp = i;
break;
case 'i':
- if (!strcmp(optarg, "-") && !pipeline)
+ if (is_pipe(optarg) && !pipeline) {
pipeline = 1;
- else if (resize_devices(optarg) != 0)
+ pipename = strdup(optarg);
+ } else if (resize_devices(optarg) != 0)
return 1;
break;
case 'D':
case 'd':
dump_binary = optarg;
break;
+ case 'M':
+ bin_output_msgs = 0;
+ break;
default:
usage(argv[0]);
return 1;
}
while (optind < argc) {
- if (!strcmp(argv[optind], "-") && !pipeline)
+ if (is_pipe(argv[optind]) && !pipeline) {
pipeline = 1;
- else if (resize_devices(argv[optind]) != 0)
+ pipename = strdup(argv[optind]);
+ } else if (resize_devices(argv[optind]) != 0)
return 1;
optind++;
}
ofp = fdopen(STDOUT_FILENO, "w");
mode = _IOLBF;
} else {
- char ofname[128];
+ char ofname[PATH_MAX];
snprintf(ofname, sizeof(ofname) - 1, "%s", output_name);
ofp = fopen(ofname, "w");
}
if (dump_binary) {
- dump_fp = fopen(dump_binary, "w");
- if (!dump_fp) {
- perror(dump_binary);
- dump_binary = NULL;
- return 1;
+ if (!strcmp(dump_binary, "-"))
+ dump_fp = stdout;
+ else {
+ dump_fp = fopen(dump_binary, "w");
+ if (!dump_fp) {
+ perror(dump_binary);
+ dump_binary = NULL;
+ return 1;
+ }
}
bin_ofp_buffer = malloc(128 * 1024);
if (setvbuf(dump_fp, bin_ofp_buffer, _IOFBF, 128 * 1024)) {
}
if (pipeline)
- ret = do_stdin();
+ ret = do_fifo();
else
ret = do_file();
- show_stats();
- if (ofp_buffer)
+ if (!ret)
+ show_stats();
+
+ if (have_drv_data && !dump_binary)
+ printf("\ndiscarded traces containing low-level device driver "
+ "specific data (only available in binary output)\n");
+
+ if (ofp_buffer) {
+ fflush(ofp);
free(ofp_buffer);
- if (bin_ofp_buffer)
+ }
+ if (bin_ofp_buffer) {
+ fflush(dump_fp);
free(bin_ofp_buffer);
+ }
return ret;
}