X-Git-Url: https://git.kernel.dk/?p=fio.git;a=blobdiff_plain;f=blktrace.c;h=58f28ceb664db93a4ca0e97f0405a066929d3b38;hp=988ce5917cd564f13aadd00402501e7f6713385a;hb=bb446c117849b39ece795602774c3716eef6064a;hpb=fdefd987d7ba284c7a9e101911c4b8e72fe326ad diff --git a/blktrace.c b/blktrace.c index 988ce591..58f28ceb 100644 --- a/blktrace.c +++ b/blktrace.c @@ -3,31 +3,80 @@ */ #include #include +#include +#include + #include "list.h" #include "fio.h" #include "blktrace_api.h" -static int discard_pdu(int fd, struct blk_io_trace *t) +#define TRACE_FIFO_SIZE 65536 + +/* + * fifo refill frontend, to avoid reading data in trace sized bites + */ +static int refill_fifo(struct thread_data *td, struct fifo *fifo, int fd) +{ + char buf[TRACE_FIFO_SIZE]; + unsigned int total; + int ret; + + total = sizeof(buf); + if (total > fifo_room(fifo)) + total = fifo_room(fifo); + + ret = read(fd, buf, total); + if (ret < 0) { + td_verror(td, errno, "read blktrace file"); + return -1; + } + + if (ret > 0) + ret = fifo_put(fifo, buf, ret); + + return ret; +} + +/* + * Retrieve 'len' bytes from the fifo, refilling if necessary. + */ +static int trace_fifo_get(struct thread_data *td, struct fifo *fifo, int fd, + void *buf, unsigned int len) +{ + if (fifo_len(fifo) < len) { + int ret = refill_fifo(td, fifo, fd); + + if (ret < 0) + return ret; + } + + return fifo_get(fifo, buf, len); +} + +/* + * Just discard the pdu by seeking past it. + */ +static int discard_pdu(struct thread_data *td, struct fifo *fifo, int fd, + struct blk_io_trace *t) { if (t->pdu_len == 0) return 0; - if (lseek(fd, t->pdu_len, SEEK_CUR) < 0) - return errno; - - return 0; + return trace_fifo_get(td, fifo, fd, NULL, t->pdu_len); } +/* + * Check if this is a blktrace binary data file. We read a single trace + * into memory and check for the magic signature. + */ int is_blktrace(const char *filename) { struct blk_io_trace t; int fd, ret; fd = open(filename, O_RDONLY); - if (fd < 0) { - perror("open blktrace"); + if (fd < 0) return 0; - } ret = read(fd, &t, sizeof(t)); close(fd); @@ -46,15 +95,99 @@ int is_blktrace(const char *filename) return 0; } +static int lookup_device(char *path, unsigned int maj, unsigned int min) +{ + struct dirent *dir; + struct stat st; + int found = 0; + DIR *D; + + D = opendir(path); + if (!D) + return 0; + + while ((dir = readdir(D)) != NULL) { + char full_path[256]; + + if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) + continue; + + sprintf(full_path, "%s/%s", path, dir->d_name); + if (lstat(full_path, &st) == -1) { + perror("lstat"); + break; + } + + if (S_ISDIR(st.st_mode)) { + found = lookup_device(full_path, maj, min); + if (found) { + strcpy(path, full_path); + break; + } + } + + if (!S_ISBLK(st.st_mode)) + continue; + + if (maj == major(st.st_rdev) && min == minor(st.st_rdev)) { + strcpy(path, full_path); + found = 1; + break; + } + } + + closedir(D); + return found; +} + +#define FMINORBITS 20 +#define FMINORMASK ((1U << FMINORBITS) - 1) +#define FMAJOR(dev) ((unsigned int) ((dev) >> FMINORBITS)) +#define FMINOR(dev) ((unsigned int) ((dev) & FMINORMASK)) + +static void trace_add_file(struct thread_data *td, __u32 device) +{ + static unsigned int last_maj, last_min; + unsigned int maj = FMAJOR(device); + unsigned int min = FMINOR(device); + struct fio_file *f; + char dev[256]; + unsigned int i; + + if (last_maj == maj && last_min == min) + return; + + last_maj = maj; + last_min = min; + + /* + * check for this file in our list + */ + for_each_file(td, f, i) + if (f->major == maj && f->minor == min) + return; + + strcpy(dev, "/dev"); + if (lookup_device(dev, maj, min)) + add_file(td, dev); +} + +/* + * Store blk_io_trace data in an ipo for later retrieval. + */ static void store_ipo(struct thread_data *td, unsigned long long offset, - unsigned int bytes, int rw) + unsigned int bytes, int rw, unsigned long long ttime) { struct io_piece *ipo = malloc(sizeof(*ipo)); memset(ipo, 0, sizeof(*ipo)); INIT_LIST_HEAD(&ipo->list); - ipo->offset = offset; + /* + * the 512 is wrong here, it should be the hardware sector size... + */ + ipo->offset = offset * 512; ipo->len = bytes; + ipo->delay = ttime / 1000; if (rw) ipo->ddir = DDIR_WRITE; else @@ -63,20 +196,47 @@ static void store_ipo(struct thread_data *td, unsigned long long offset, list_add_tail(&ipo->list, &td->io_log_list); } -static void handle_trace(struct thread_data *td, struct blk_io_trace *t) +/* + * We only care for queue traces, most of the others are side effects + * due to internal workings of the block layer. + */ +static void handle_trace(struct thread_data *td, struct blk_io_trace *t, + unsigned long long ttime, unsigned long *ios, + unsigned int *bs) { int rw; if ((t->action & 0xffff) != __BLK_TA_QUEUE) return; + if (t->action & BLK_TC_ACT(BLK_TC_PC)) + return; + if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY)) + return; + + trace_add_file(td, t->device); + + rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; + + if (t->bytes > bs[rw]) + bs[rw] = t->bytes; - w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0; - store_ipo(td, t->sector, t->bytes, rw); + ios[rw]++; + td->o.size += t->bytes; + store_ipo(td, t->sector, t->bytes, rw, ttime); } +/* + * Load a blktrace file by reading all the blk_io_trace entries, and storing + * them as io_pieces like the fio text version would do. + */ int load_blktrace(struct thread_data *td, const char *filename) { + unsigned long long ttime, delay; struct blk_io_trace t; + unsigned long ios[2]; + unsigned int cpu; + unsigned int rw_bs[2]; + struct fifo *fifo; int fd; fd = open(filename, O_RDONLY); @@ -85,35 +245,83 @@ int load_blktrace(struct thread_data *td, const char *filename) return 1; } + fifo = fifo_alloc(TRACE_FIFO_SIZE); + + td->o.size = 0; + + cpu = 0; + ttime = 0; + ios[0] = ios[1] = 0; + rw_bs[0] = rw_bs[1] = 0; do { - int ret = read(fd, &t, sizeof(t)); + int ret = trace_fifo_get(td, fifo, fd, &t, sizeof(t)); - if (ret < 0) { - td_verror(td, errno, "read blktrace file"); - return 1; - } else if (!ret) { + if (ret < 0) + goto err; + else if (!ret) + break; + else if (ret < (int) sizeof(t)) { + log_err("fio: short fifo get\n"); break; - } else if (ret != sizeof(t)) { - log_err("fio: short read on blktrace file\n"); - return 1; } if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) { - log_err("fio: bad magic in blktrace data\n"); - return 1; + log_err("fio: bad magic in blktrace data: %x\n", t.magic); + goto err; } if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) { log_err("fio: bad blktrace version %d\n", t.magic & 0xff); - return 1; + goto err; } - ret = discard_pdu(fd, &t); - if (ret) { + ret = discard_pdu(td, fifo, fd, &t); + if (ret < 0) { td_verror(td, ret, "blktrace lseek"); - return 1; + goto err; + } else if (t.pdu_len != ret) { + log_err("fio: discarded %d of %d\n", ret, t.pdu_len); + goto err; + } + if (t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) + continue; + if (!ttime) { + ttime = t.time; + cpu = t.cpu; } - handle_trace(td, &t); + delay = 0; + if (cpu == t.cpu) + delay = t.time - ttime; + handle_trace(td, &t, delay, ios, rw_bs); + ttime = t.time; + cpu = t.cpu; } while (1); + fifo_free(fifo); close(fd); + + if (!ios[DDIR_READ] && !ios[DDIR_WRITE]) { + log_err("fio: found no ios in blktrace data\n"); + return 1; + } else if (ios[DDIR_READ] && !ios[DDIR_READ]) { + td->o.td_ddir = TD_DDIR_READ; + td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; + } else if (!ios[DDIR_READ] && ios[DDIR_WRITE]) { + td->o.td_ddir = TD_DDIR_WRITE; + td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; + } else { + td->o.td_ddir = TD_DDIR_RW; + td->o.max_bs[DDIR_READ] = rw_bs[DDIR_READ]; + td->o.max_bs[DDIR_WRITE] = rw_bs[DDIR_WRITE]; + } + + /* + * We need to do direct/raw ios to the device, to avoid getting + * read-ahead in our way. + */ + td->o.odirect = 1; + return 0; +err: + close(fd); + fifo_free(fifo); + return 1; }