*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Parts of this file were imported from Jens Axboe's blktrace sources (also GPL)
*/
#include <sys/mman.h>
#include <time.h>
#include <math.h>
+#include <dirent.h>
#include "plot.h"
#include "blkparse.h"
static struct list_head io_hash_table[IO_HASH_TABLE_SIZE];
static u64 ios_in_flight = 0;
+#define PROCESS_HASH_TABLE_BITS 7
+#define PROCESS_HASH_TABLE_SIZE (1 << PROCESS_HASH_TABLE_BITS)
+static struct list_head process_hash_table[PROCESS_HASH_TABLE_SIZE];
+
+extern int plot_io_action;
+extern int io_per_process;
+
+static const int line_len = 1024;
+static char line[1024];
/*
* Trace categories
/* sector offset of this IO */
u64 sector;
+ /* dev_t for this IO */
+ u32 device;
+
/* time this IO was dispatched */
u64 dispatch_time;
/* time this IO was finished */
u64 completion_time;
struct list_head hash_list;
+ /* process which queued this IO */
+ u32 pid;
+};
+
+struct pid_map {
+ struct list_head hash_list;
+ u32 pid;
+ int index;
+ char name[0];
};
#define MINORBITS 20
return hash >> (64 - IO_HASH_TABLE_BITS);
}
-static int hash_table_insert(struct pending_io *ins_pio)
+static int io_hash_table_insert(struct pending_io *ins_pio)
{
u64 sector = ins_pio->sector;
+ u32 dev = ins_pio->device;
int slot = hash_sector(sector);
struct list_head *head;
struct pending_io *pio;
head = io_hash_table + slot;
list_for_each_entry(pio, head, hash_list) {
- if (pio->sector == sector)
+ if (pio->sector == sector && pio->device == dev)
return -EEXIST;
}
list_add_tail(&ins_pio->hash_list, head);
return 0;
}
-static struct pending_io *hash_table_search(u64 sector)
+static struct pending_io *io_hash_table_search(u64 sector, u32 dev)
{
int slot = hash_sector(sector);
struct list_head *head;
head = io_hash_table + slot;
list_for_each_entry(pio, head, hash_list) {
- if (pio->sector == sector)
+ if (pio->sector == sector && pio->device == dev)
return pio;
}
return NULL;
}
-static int hash_dispatched_io(struct blk_io_trace *io)
+static struct pending_io *hash_queued_io(struct blk_io_trace *io)
{
struct pending_io *pio;
int ret;
pio = calloc(1, sizeof(*pio));
pio->sector = io->sector;
- pio->dispatch_time = io->time;
+ pio->device = io->device;
+ pio->pid = io->pid;
- ret = hash_table_insert(pio);
- if (ret == -EEXIST) {
- /* crud, the IO isn't here */
+ ret = io_hash_table_insert(pio);
+ if (ret < 0) {
+ /* crud, the IO is there already */
free(pio);
+ return NULL;
+ }
+ return pio;
+}
+
+static struct pending_io *hash_dispatched_io(struct blk_io_trace *io)
+{
+ struct pending_io *pio;
+
+ pio = io_hash_table_search(io->sector, io->device);
+ if (!pio) {
+ pio = hash_queued_io(io);
+ if (!pio)
+ return NULL;
}
- return ret;
+ pio->dispatch_time = io->time;
+ return pio;
}
static struct pending_io *hash_completed_io(struct blk_io_trace *io)
{
struct pending_io *pio;
- pio = hash_table_search(io->sector);
+ pio = io_hash_table_search(io->sector, io->device);
if (!pio)
return NULL;
return pio;
}
+void init_process_hash_table(void)
+{
+ int i;
+ struct list_head *head;
+
+ for (i = 0; i < PROCESS_HASH_TABLE_SIZE; i++) {
+ head = process_hash_table + i;
+ INIT_LIST_HEAD(head);
+ }
+}
+
+static u32 hash_pid(u32 pid)
+{
+ u32 hash = pid;
+
+ hash ^= pid >> 3;
+ hash ^= pid >> 3;
+ hash ^= pid >> 4;
+ hash ^= pid >> 6;
+ return (hash & (PROCESS_HASH_TABLE_SIZE - 1));
+}
+
+static struct pid_map *process_hash_search(u32 pid)
+{
+ int slot = hash_pid(pid);
+ struct list_head *head;
+ struct pid_map *pm;
+
+ head = process_hash_table + slot;
+ list_for_each_entry(pm, head, hash_list) {
+ if (pm->pid == pid)
+ return pm;
+ }
+ return NULL;
+}
+
+static struct pid_map *process_hash_insert(u32 pid, char *name)
+{
+ int slot = hash_pid(pid);
+ struct pid_map *pm;
+ int old_index = 0;
+ char buf[16];
+
+ pm = process_hash_search(pid);
+ if (pm) {
+ /* Entry exists and name shouldn't be changed? */
+ if (!name || !strcmp(name, pm->name))
+ return pm;
+ list_del(&pm->hash_list);
+ old_index = pm->index;
+ free(pm);
+ }
+ if (!name) {
+ sprintf(buf, "[%u]", pid);
+ name = buf;
+ }
+ pm = malloc(sizeof(struct pid_map) + strlen(name) + 1);
+ pm->pid = pid;
+ pm->index = old_index;
+ strcpy(pm->name, name);
+ list_add_tail(&pm->hash_list, process_hash_table + slot);
+
+ return pm;
+}
+
static void handle_notify(struct trace *trace)
{
struct blk_io_trace *io = trace->io;
void *payload = (char *)io + sizeof(*io);
u32 two32[2];
+ if (io->action == BLK_TN_PROCESS) {
+ if (io_per_process)
+ process_hash_insert(io->pid, payload);
+ return;
+ }
if (io->action != BLK_TN_TIMESTAMP)
return;
return -1;
}
+static struct dev_info *lookup_dev(struct trace *trace, struct blk_io_trace *io)
+{
+ u32 dev = io->device;
+ int i;
+ struct dev_info *di = NULL;
+
+ for (i = 0; i < trace->num_devices; i++) {
+ if (trace->devices[i].device == dev) {
+ di = trace->devices + i;
+ goto found;
+ }
+ }
+ i = trace->num_devices++;
+ if (i >= MAX_DEVICES_PER_TRACE) {
+ fprintf(stderr, "Trace contains too many devices (%d)\n", i);
+ exit(1);
+ }
+ di = trace->devices + i;
+ di->device = dev;
+found:
+ return di;
+}
+
+static void map_devices(struct trace *trace)
+{
+ struct dev_info *di;
+ u64 found;
+ u64 map_start = 0;
+ int i;
+
+ first_record(trace);
+ while (1) {
+ if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+ di = lookup_dev(trace, trace->io);
+ found = trace->io->sector << 9;
+ if (found < di->min)
+ di->min = found;
+
+ found += trace->io->bytes;
+ if (di->max < found)
+ di->max = found;
+ }
+ if (next_record(trace))
+ break;
+ }
+ first_record(trace);
+ for (i = 0; i < trace->num_devices; i++) {
+ di = trace->devices + i;
+ di->map = map_start;
+ map_start += di->max - di->min;
+ }
+}
+
+u64 map_io(struct trace *trace, struct blk_io_trace *io)
+{
+ struct dev_info *di = lookup_dev(trace, io);
+ u64 val = trace->io->sector << 9;
+ return di->map + val - di->min;
+}
+
void find_extreme_offsets(struct trace *trace, u64 *min_ret, u64 *max_ret, u64 *max_bank_ret,
u64 *max_offset_ret)
{
u64 max_bank = 0;
u64 max_bank_offset = 0;
u64 num_banks = 0;
+
+ map_devices(trace);
+
first_record(trace);
while (1) {
if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
- found = trace->io->sector << 9;
+ found = map_io(trace, trace->io);
if (found < min)
min = found;
*max_offset_ret = max_bank_offset;
}
+static void check_io_types(struct trace *trace)
+{
+ struct blk_io_trace *io = trace->io;
+ int action = io->action & BLK_TA_MASK;
+
+ if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
+ switch (action) {
+ case __BLK_TA_COMPLETE:
+ trace->found_completion = 1;
+ break;
+ case __BLK_TA_ISSUE:
+ trace->found_issue = 1;
+ break;
+ case __BLK_TA_QUEUE:
+ trace->found_queue = 1;
+ break;
+ };
+ }
+}
+
+
int filter_outliers(struct trace *trace, u64 min_offset, u64 max_offset,
u64 *yzoom_min, u64 *yzoom_max)
{
memset(min_per_bucket, 0xff, sizeof(u64) * 11);
first_record(trace);
while (1) {
+ check_io_types(trace);
if (!(trace->io->action & BLK_TC_ACT(BLK_TC_NOTIFY)) &&
(trace->io->action & BLK_TA_MASK) == __BLK_TA_QUEUE) {
- u64 off = (trace->io->sector << 9) - min_offset;
+ u64 off = map_io(trace, trace->io) - min_offset;
slot = (int)(off / bytes_per_bucket);
hits[slot]++;
if (off < min_per_bucket[slot])
min_per_bucket[slot] = off;
- off += trace->io->bytes;
+ off += trace->io->bytes;
slot = (int)(off / bytes_per_bucket);
hits[slot]++;
if (off > max_per_bucket[slot])
return 0;
}
+static char footer[] = ".blktrace.0";
+static int footer_len = sizeof(footer);
+
+static void match_trace(char *name, char **traces)
+{
+ int match_len;
+ char *match;
+ int footer_start;
+
+ match_len = strlen(name);
+ if (match_len <= footer_len)
+ return;
+
+ footer_start = match_len - footer_len;
+ if (strcmp(name + footer_start + 1, footer) != 0)
+ return;
+
+ match = strdup(name);
+ if (!match)
+ goto enomem;
+
+ match[footer_start + 1] = '\0';
+ snprintf(line, line_len, "%s -i '%s'", *traces ? *traces : "", match);
+ free(match);
+
+ match = strdup(line);
+ if (!match)
+ goto enomem;
+
+ free(*traces);
+ *traces = match;
+ return;
+
+enomem:
+ perror("memory allocation failed");
+ exit(1);
+ return;
+}
+
+static char *combine_blktrace_devs(char *dir_name)
+{
+ DIR *dir;
+ char *traces = NULL;
+ struct dirent *d;
+ int len;
+ int ret;
+
+ dir = opendir(dir_name);
+ if (!dir)
+ return NULL;
+
+ while (1) {
+ d = readdir(dir);
+ if (!d)
+ break;
+
+ len = strlen(d->d_name);
+ if (len > footer_len)
+ match_trace(d->d_name, &traces);
+ }
+
+ closedir(dir);
+
+ if (!traces)
+ return NULL;
+
+ snprintf(line, line_len, "blkparse -O %s -D %s -d '%s.%s'",
+ traces, dir_name, dir_name, "dump");
+
+ ret = system(line);
+ if (ret) {
+ fprintf(stderr, "blkparse failure %s\n", line);
+ exit(1);
+ }
+ snprintf(line, line_len, "%s.%s", dir_name, "dump");
+ return strdup(line);
+}
+
static char *find_trace_file(char *filename)
{
int ret;
struct stat st;
- char line[1024];
char *dot;
char *try;
+ int found_dir = 0;
+ /* look for an exact match of whatever they pass in.
+ * If it is a file, assume it is the dump file.
+ * If a directory, remember that it existed so we
+ * can combine traces in that directory later
+ */
ret = stat(filename, &st);
- if (ret == 0)
- return strdup(filename);
+ if (ret == 0) {
+ if (S_ISREG(st.st_mode))
+ return strdup(filename);
+
+ if (S_ISDIR(st.st_mode))
+ found_dir = 1;
+ }
- snprintf(line, 1024, "%s.%s", filename, "dump");
+ /*
+ * try tacking .dump onto the end and see if that already
+ * has been generated
+ */
+ snprintf(line, line_len, "%s.%s", filename, "dump");
ret = stat(line, &st);
if (ret == 0)
return strdup(line);
+ /*
+ * try to generate the .dump from all the traces in
+ * a single dir.
+ */
+ if (found_dir) {
+ try = combine_blktrace_devs(filename);
+ if (try)
+ return try;
+ }
+
+ /*
+ * try to generate the .dump from all the blktrace
+ * files for a named trace
+ */
try = strdup(filename);
dot = strrchr(try, '.');
if (!dot || strcmp(".dump", dot) != 0) {
- if (dot)
+ if (dot && dot != try)
*dot = '\0';
- snprintf(line, 1024, "%s%s", try, ".blktrace.0");
+ snprintf(line, line_len, "%s%s", try, ".blktrace.0");
ret = stat(line, &st);
if (ret == 0) {
blktrace_to_dump(try);
- snprintf(line, 1024, "%s.%s", try, "dump");
+ snprintf(line, line_len, "%s.%s", try, "dump");
ret = stat(line, &st);
if (ret == 0) {
free(try);
return __BLK_TA_COMPLETE;
}
+int action_char_to_num(char action)
+{
+ switch (action) {
+ case 'Q':
+ return __BLK_TA_QUEUE;
+ case 'D':
+ return __BLK_TA_ISSUE;
+ case 'C':
+ return __BLK_TA_COMPLETE;
+ }
+ return -1;
+}
+
static inline int io_event(struct trace *trace)
{
+ if (plot_io_action)
+ return plot_io_action;
if (trace->found_queue)
return __BLK_TA_QUEUE;
if (trace->found_issue)
return __BLK_TA_COMPLETE;
}
-void add_tput(struct trace *trace, struct graph_line_data *gld)
+void add_tput(struct trace *trace, struct graph_line_data *writes_gld,
+ struct graph_line_data *reads_gld)
{
struct blk_io_trace *io = trace->io;
+ struct graph_line_data *gld;
int action = io->action & BLK_TA_MASK;
int seconds;
if (action != tput_event(trace))
return;
+ if (BLK_DATADIR(io->action) & BLK_TC_READ)
+ gld = reads_gld;
+ else
+ gld = writes_gld;
+
seconds = SECONDS(io->time);
- if (seconds > gld->max_seconds) {
- fprintf(stderr, "Bad record %d %d %d\n", seconds, gld->max_seconds, action);
- abort();
- }
+ if (seconds > gld->max_seconds)
+ return;
gld->data[seconds].sum += io->bytes;
+
gld->data[seconds].count = 1;
if (gld->data[seconds].sum > gld->max)
gld->max = gld->data[seconds].sum;
}
-void add_io(struct trace *trace, struct graph_dot_data *gdd_writes,
- struct graph_dot_data *gdd_reads)
+#define GDD_PTR_ALLOC_STEP 16
+
+static struct pid_map *get_pid_map(struct trace_file *tf, u32 pid)
+{
+ struct pid_map *pm;
+
+ if (!io_per_process) {
+ if (!tf->io_plots)
+ tf->io_plots = 1;
+ return NULL;
+ }
+
+ pm = process_hash_insert(pid, NULL);
+ /* New entry? */
+ if (!pm->index) {
+ if (tf->io_plots == tf->io_plots_allocated) {
+ tf->io_plots_allocated += GDD_PTR_ALLOC_STEP;
+ tf->gdd_reads = realloc(tf->gdd_reads, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
+ if (!tf->gdd_reads)
+ abort();
+ tf->gdd_writes = realloc(tf->gdd_writes, tf->io_plots_allocated * sizeof(struct graph_dot_data *));
+ if (!tf->gdd_writes)
+ abort();
+ memset(tf->gdd_reads + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
+ 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
+ memset(tf->gdd_writes + tf->io_plots_allocated - GDD_PTR_ALLOC_STEP,
+ 0, GDD_PTR_ALLOC_STEP * sizeof(struct graph_dot_data *));
+ }
+ pm->index = tf->io_plots++;
+
+ return pm;
+ }
+ return pm;
+}
+
+void add_io(struct trace *trace, struct trace_file *tf)
{
struct blk_io_trace *io = trace->io;
int action = io->action & BLK_TA_MASK;
u64 offset;
+ int index;
+ char *label;
+ struct pid_map *pm;
if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
return;
if (action != io_event(trace))
return;
- offset = io->sector << 9;
+ offset = map_io(trace, io);
- if (BLK_DATADIR(io->action) & BLK_TC_READ)
- set_gdd_bit(gdd_reads, offset, io->bytes, io->time);
- else if (BLK_DATADIR(io->action) & BLK_TC_WRITE)
- set_gdd_bit(gdd_writes, offset, io->bytes, io->time);
+ pm = get_pid_map(tf, io->pid);
+ if (!pm) {
+ index = 0;
+ label = "";
+ } else {
+ index = pm->index;
+ label = pm->name;
+ }
+ if (BLK_DATADIR(io->action) & BLK_TC_READ) {
+ if (!tf->gdd_reads[index])
+ tf->gdd_reads[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
+ set_gdd_bit(tf->gdd_reads[index], offset, io->bytes, io->time);
+ } else if (BLK_DATADIR(io->action) & BLK_TC_WRITE) {
+ if (!tf->gdd_writes[index])
+ tf->gdd_writes[index] = alloc_dot_data(tf->min_seconds, tf->max_seconds, tf->min_offset, tf->max_offset, tf->stop_seconds, pick_color(), strdup(label));
+ set_gdd_bit(tf->gdd_writes[index], offset, io->bytes, io->time);
+ }
}
void add_pending_io(struct trace *trace, struct graph_line_data *gld)
{
- int ret;
int seconds;
struct blk_io_trace *io = trace->io;
int action = io->action & BLK_TA_MASK;
double avg;
+ struct pending_io *pio;
if (io->action & BLK_TC_ACT(BLK_TC_NOTIFY))
return;
+ if (action == __BLK_TA_QUEUE) {
+ if (trace->found_issue || trace->found_completion)
+ hash_queued_io(trace->io);
+ return;
+ }
if (action != __BLK_TA_ISSUE)
return;
seconds = SECONDS(io->time);
- if (seconds > gld->max_seconds) {
- fprintf(stderr, "Bad record %d %d\n", seconds, gld->max_seconds);
- abort();
- }
+ if (seconds > gld->max_seconds)
+ return;
- ret = hash_dispatched_io(trace->io);
- if (ret)
+ pio = hash_dispatched_io(trace->io);
+ if (!pio)
return;
+ if (!trace->found_completion) {
+ list_del(&pio->hash_list);
+ free(pio);
+ }
+
ios_in_flight++;
gld->data[seconds].sum += ios_in_flight;
return;
seconds = SECONDS(io->time);
- if (seconds > gld->max_seconds) {
- fprintf(stderr, "Bad record %d %d\n", seconds, gld->max_seconds);
- abort();
- }
+ if (seconds > gld->max_seconds)
+ return;
gld->data[seconds].sum += 1;
gld->data[seconds].count = 1;
void check_record(struct trace *trace)
{
- struct blk_io_trace *io = trace->io;
- int action = io->action & BLK_TA_MASK;
-
- if (!(io->action & BLK_TC_ACT(BLK_TC_NOTIFY))) {
- switch (action) {
- case __BLK_TA_COMPLETE:
- trace->found_completion = 1;
- break;
- case __BLK_TA_ISSUE:
- trace->found_issue = 1;
- break;
- case __BLK_TA_QUEUE:
- trace->found_queue = 1;
- break;
- };
- }
handle_notify(trace);
}