CC = gcc
CFLAGS = -Wall -O2 -W -g
-#CFLAGS = -Wall -g -W -UDO_INLINE -DDEBUG
+# CFLAGS = -Wall -g -W -UDO_INLINE -DDEBUG
ALL_CFLAGS = $(CFLAGS) -I.. -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
PROGS = btt
#ELIBS = -lefence
LIBS = $(PLIBS) $(ELIBS)
OBJS = args.o bt_timeline.o devmap.o devs.o dip_rb.o iostat.o latency.o \
misc.o output.o proc.o seek.o trace.o trace_complete.o trace_im.o \
- trace_issue.o trace_queue.o trace_remap.o trace_requeue.o rbtree.o
+ trace_issue.o trace_queue.o trace_remap.o trace_requeue.o rbtree.o \
+ mmap.o
all: depend $(PROGS)
exit(1);
}
- ifd = open(input_name, O_RDONLY);
- if (ifd < 0) {
- perror(input_name);
- exit(1);
- }
+ setup_ifile(input_name);
if (output_name == NULL)
ranges_ofp = avgs_ofp = stdout;
perror(iostat_name);
exit(1);
}
+ setbuffer(iostat_ofp, malloc(64 * 1024), 64 * 1024);
}
if (per_io_name != NULL) {
perror(per_io_name);
exit(1);
}
+ setbuffer(per_io_ofp, malloc(64 * 1024), 64 * 1024);
}
}
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <sys/time.h>
+#include <time.h>
#include "globals.h"
char bt_timeline_version[] = "0.99";
char *devices, *exes, *input_name, *output_name, *seek_name;
char *d2c_name, *q2c_name, *per_io_name;
FILE *ranges_ofp, *avgs_ofp, *per_io_ofp;
-int ifd, verbose, done, time_bounded;
+int verbose, done, time_bounded;
double t_astart, t_aend;
unsigned long n_traces;
struct avgs_info all_avgs;
double range_delta = 0.1;
__u64 last_q = (__u64)-1;
+__u64 next_retry_check = 0;
struct region_info all_regions = {
.qranges = LIST_HEAD_INIT(all_regions.qranges),
.cr_cur = NULL
};
+#if defined(DEBUG)
+int rb_tree_size;
+#endif
+
int process(void);
int main(int argc, char *argv[])
return 0;
}
+static inline double tv2dbl(struct timeval *tv)
+{
+ return (double)tv->tv_sec + (((double)tv->tv_usec) / (1000.0 * 1000.0));
+}
+
int process(void)
{
int ret = 0;
- struct blk_io_trace *t;
struct io *iop = io_alloc();
+ struct timeval tvs, tvi, tve;
genesis = last_vtrace = time(NULL);
- while (!done && !do_read(ifd, &iop->t, sizeof(struct blk_io_trace))) {
- t = convert_to_cpu(&iop->t);
- if (t->pdu_len > 0) {
- iop->pdu = malloc(t->pdu_len);
- if (do_read(ifd, iop->pdu, t->pdu_len)) {
- ret = 1;
- break;
- }
- }
+ gettimeofday(&tvs, NULL);
+ while (!done && next_trace(&iop->t, &iop->pdu)) {
add_trace(iop);
iop = io_alloc();
}
io_release(iop);
- do_retries();
+ do_retries(0);
+
+ gettimeofday(&tvi, NULL);
if (iostat_ofp) {
fprintf(iostat_ofp, "\n");
seek_clean();
latency_clean();
+ gettimeofday(&tve, NULL);
if (verbose) {
- double tps = (double)n_traces /
- (double)((time(NULL) + 1) - genesis);
- printf("%10lu traces @ %.1lf Ktps\n", n_traces, tps/1000.0);
+ double tps, dt_input = tv2dbl(&tvi) - tv2dbl(&tvs);
+
+ tps = (double)n_traces / dt_input;
+ printf("%10lu traces @ %.1lf Ktps\t%.6lf+%.6lf=%.6lf\n",
+ n_traces, tps/1000.0,
+ dt_input, tv2dbl(&tve) - tv2dbl(&tvi),
+ tv2dbl(&tve) - tv2dbl(&tvs));
+#if defined(DEBUG)
+ printf("\ttree = |%d|\n", rb_tree_size);
+ if (rb_tree_size > 0)
+ dump_rb_trees();
+#endif
}
return ret;
#define DEV_HASH(dev) ((MAJOR(dev) ^ MINOR(dev)) & (N_DEV_HASH - 1))
struct list_head dev_heads[N_DEV_HASH];
+#if defined(DEBUG)
+void __dump_rb_node(struct rb_node *n)
+{
+ struct io *iop = rb_entry(n, struct io, rb_node);
+
+ dbg_ping();
+ if (iop->type == IOP_A)
+ __dump_iop2(stdout, iop, bilink_first_down(iop, NULL));
+ else
+ __dump_iop(stdout, iop, 0);
+ if (n->rb_left)
+ __dump_rb_node(n->rb_left);
+ if (n->rb_right)
+ __dump_rb_node(n->rb_right);
+}
+
+void __dump_rb_tree(struct d_info *dip, enum iop_type type)
+{
+ struct rb_root *roots = dip->heads;
+ struct rb_root *root = &roots[type];
+ struct rb_node *n = root->rb_node;
+
+ if (n) {
+ printf("\tIOP_%c\n", type2c(type));
+ __dump_rb_node(n);
+ }
+}
+
+void dump_rb_trees(void)
+{
+ int i;
+ enum iop_type type;
+ struct d_info *dip;
+ struct list_head *p;
+
+ for (i = 0; i < N_DEV_HASH; i++) {
+ __list_for_each(p, &dev_heads[i]) {
+ dip = list_entry(p, struct d_info, hash_head);
+ printf("Trees for %3d,%-3d\n", MAJOR(dip->device),
+ MINOR(dip->device));
+ for (type = IOP_Q; type < N_IOP_TYPES; type++) {
+ if (type != IOP_L)
+ __dump_rb_tree(dip, type);
+ }
+ }
+ }
+}
+#endif
+
void init_dev_heads(void)
{
int i;
struct d_info *__dip_find(__u32 device)
{
- struct list_head *p;
struct d_info *dip;
+ struct list_head *p;
__list_for_each(p, &dev_heads[DEV_HASH(device)]) {
dip = list_entry(p, struct d_info, hash_head);
}
iop->linked = dip_rb_ins(dip, iop);
+#if defined(DEBUG)
+ if (iop->linked)
+ rb_tree_size++;
+#endif
return dip;
}
void dip_rem(struct io *iop)
{
- dip_rb_rem(iop);
+ if (iop->linked) {
+ dip_rb_rem(iop);
+ iop->linked = 0;
+ }
}
void dip_foreach(struct io *iop, enum iop_type type,
if ((iop_s <= this_s) && (this_e <= iop_e)) {
if (fnc) fnc(iop, this);
- if (head) list_add_tail(&this->f_head, head);
+ if (head) {
+ ASSERT(this->f_head.next == LIST_POISON1);
+ list_add_tail(&this->f_head, head);
+ }
}
if (iop_s < this_s)
rb_foreach(n->rb_left, iop, fnc, head);
#define ASSERT(truth)
#define DBG_PING()
#define LIST_DEL(hp) do { \
- if (((hp)->next != NULL) && \
- ((hp)->next != LIST_POISON1)) \
- list_del(hp); \
+ ASSERT((hp)->next != NULL); \
+ ASSERT(!list_empty(hp)); \
+ list_del(hp); \
} while (0)
#endif
IOP_Q = 0,
IOP_X = 1,
IOP_A = 2,
- IOP_L = 3, // Betwen-device linkage
+ IOP_I = 3,
IOP_M = 4,
- IOP_I = 5,
- IOP_D = 6,
- IOP_C = 7,
- IOP_R = 8,
+ IOP_D = 5,
+ IOP_C = 6,
+ IOP_R = 7,
+ IOP_L = 8, // IOP_A -> IOP_A + IOP_L
};
-#define N_IOP_TYPES (IOP_R + 1)
+#define N_IOP_TYPES (IOP_L + 1)
struct file_info {
struct file_info *next;
struct io {
struct rb_node rb_node;
- struct list_head f_head;
+ struct list_head f_head, c_pending, retry;
+ struct list_head down_list, up_list;
struct d_info *dip;
struct p_info *pip;
- struct blk_io_trace t;
void *pdu;
+ struct blk_io_trace t;
+ __u64 bytes_left;
+ int linked, on_retry_list, down_len, up_len;
enum iop_type type;
+};
- struct list_head down_head, up_head, c_pending, retry;
- struct list_head down_list, up_list;
- __u64 bytes_left;
- int run_ready, linked, self_remap, displayed, on_retry_list;
+struct bilink {
+ struct list_head down_head, up_head;
+ struct io *diop, *uiop;
};
/* bt_timeline.c */
extern char *seek_name, *iostat_name, *d2c_name, *q2c_name, *per_io_name;
extern double range_delta;
extern FILE *ranges_ofp, *avgs_ofp, *iostat_ofp, *per_io_ofp;;
-extern int verbose, ifd, dump_level, done, time_bounded;
+extern int verbose, done, time_bounded;
extern unsigned int n_devs;
extern unsigned long n_traces;
extern struct list_head all_devs, all_procs, retries;
extern struct avgs_info all_avgs;
-extern __u64 last_q;
+extern __u64 last_q, next_retry_check;
extern struct region_info all_regions;
extern struct list_head free_ios;
extern __u64 iostat_interval, iostat_last_stamp;
extern time_t genesis, last_vtrace;
extern double t_astart, t_aend;
+#if defined(DEBUG)
+extern int rb_tree_size;
+#endif
/* args.c */
void handle_args(int argc, char *argv[]);
struct devmap *dev_map_find(__u32 device);
/* devs.c */
+#if defined(DEBUG)
+void dump_rb_trees(void);
+#endif
void init_dev_heads(void);
struct d_info *dip_add(__u32 device, struct io *iop);
void dip_rem(struct io *iop);
void latency_q2c(struct d_info *dip, __u64 tstamp, __u64 latency);
/* misc.c */
-struct blk_io_trace *convert_to_cpu(struct blk_io_trace *t);
int in_devices(struct blk_io_trace *t);
-unsigned int do_read(int ifd, void *buf, int len);
void add_file(struct file_info **fipp, FILE *fp, char *oname);
void clean_files(struct file_info **fipp);
void dbg_ping(void);
+/* mmap.c */
+void setup_ifile(char *fname);
+void cleanup_ifile(void);
+int next_trace(struct blk_io_trace *t, void **pdu);
+
/* output.c */
int output_avgs(FILE *ofp);
int output_ranges(FILE *ofp);
int seeki_mode(void *handle, struct mode *mp);
/* trace.c */
-void dump_iop(FILE *ofp, struct io *to_iop, struct io *from_iop, int indent);
-void release_iops(struct list_head *del_head);
+void __dump_iop(FILE *ofp, struct io *iop, int extra_nl);
+void __dump_iop2(FILE *ofp, struct io *a_iop, struct io *l_iop);
+void release_iops(struct list_head *rmhd);
void add_trace(struct io *iop);
-void do_retries(void);
+void do_retries(__u64 now);
/* trace_complete.c */
void trace_complete(struct io *c_iop);
-int retry_complete(struct io *c_iop);
-int ready_complete(struct io *c_iop, struct io *top);
-void run_complete(struct io *c_iop);
+void retry_complete(struct io *c_iop, __u64 now);
/* trace_im.c */
+void run_im(struct io *im_iop, struct io *c_iop, void *param);
+void run_unim(struct io *im_iop, struct list_head *rmhd);
+int ready_im(struct io *im_iop, struct io *c_iop);
void trace_insert(struct io *i_iop);
void trace_merge(struct io *m_iop);
-int ready_im(struct io *im_iop, struct io *top);
-void run_im(struct io *im_iop, struct io *top, struct list_head *del_head);
-void run_unim(struct io *im_iop, struct list_head *del_head);
/* trace_issue.c */
+void run_issue(struct io *d_iop, struct io *c_iop, void *param);
+void run_unissue(struct io *d_iop, struct list_head *rmhd);
+int ready_issue(struct io *d_iop, struct io *c_iop);
void trace_issue(struct io *d_iop);
-int ready_issue(struct io *d_iop, struct io *top);
-void run_issue(struct io *d_iop, struct io *top, struct list_head *del_head);
-void run_unissue(struct io *d_iop, struct list_head *del_head);
/* trace_queue.c */
+void run_queue(struct io *q_iop, struct io *c_iop, struct list_head *rmhd);
+int ready_queue(struct io *q_iop, struct io *c_iop);
void trace_queue(struct io *q_iop);
-int ready_queue(struct io *q_iop, struct io *top);
-void run_queue(struct io *q_iop, struct io *top, struct list_head *del_head);
-void run_unqueue(struct io *q_iop, struct list_head *del_head);
/* trace_remap.c */
+void run_remap(struct io *a_iop, struct io *c_iop, void *param);
+int ready_remap(struct io *a_iop, struct io *c_iop);
void trace_remap(struct io *a_iop);
-int ready_remap(struct io *a_iop, struct io *top);
-void run_remap(struct io *a_iop, struct io *top, struct list_head *del_head);
-void run_unremap(struct io *a_iop, struct list_head *del_head);
/* trace_requeue.c */
void trace_requeue(struct io *r_iop);
-int retry_requeue(struct io *r_iop);
-int ready_requeue(struct io *r_iop, struct io *top);
-void run_requeue(struct io *r_iop);
#include "inlines.h"
}
}
+static inline void avg_update_n(struct avg_info *ap, __u64 t, int n)
+{
+ if (ap->n == 0) {
+ ap->min = ap->max = t;
+ ap->total = (n * t);
+ }
+ else {
+ if (t < ap->min)
+ ap->min = t;
+ else if (t > ap->max)
+ ap->max = t;
+ ap->total += (n * t);
+ }
+
+ ap->n += n;
+}
+
static inline void avg_unupdate(struct avg_info *ap, __u64 t)
{
ap->n--;
iop = malloc(sizeof(struct io));
memset(iop, 0, sizeof(struct io));
+ INIT_LIST_HEAD(&iop->down_list);
+ INIT_LIST_HEAD(&iop->up_list);
+
+#if defined(DEBUG)
+ iop->f_head.next = LIST_POISON1;
+ iop->c_pending.next = LIST_POISON1;
+ iop->retry.next = LIST_POISON1;
+#endif
return iop;
}
iop->dip = dip_add(iop->t.device, iop);
if (iop->linked) {
iop->pip = find_process(iop->t.pid, NULL);
- INIT_LIST_HEAD(&iop->down_list);
- INIT_LIST_HEAD(&iop->up_list);
iop->bytes_left = iop->t.bytes;
}
static inline void io_release(struct io *iop)
{
+ ASSERT(iop->f_head.next == LIST_POISON1);
+ ASSERT(iop->c_pending.next == LIST_POISON1);
+ ASSERT(iop->retry.next == LIST_POISON1);
+ ASSERT(list_empty(&iop->up_list));
+ ASSERT(list_empty(&iop->down_list));
+
if (iop->linked)
dip_rem(iop);
if (iop->pdu)
free(iop->pdu);
+
io_free(iop);
}
if (_pip) avg_update(&_pip->avgs. _avg , _time); \
} while (0)
+#define UPDATE_AVGS_N(_avg, _iop, _pip, _time, _n) do { \
+ avg_update_n(&all_avgs. _avg , _time, _n); \
+ avg_update_n(&_iop->dip->avgs. _avg , _time, _n); \
+ if (_pip) avg_update_n(&_pip->avgs. _avg , _time,_n); \
+ } while (0)
+
#define UNUPDATE_AVGS(_avg, _iop, _pip, _time) do { \
avg_unupdate(&all_avgs. _avg , _time); \
avg_unupdate(&_iop->dip->avgs. _avg , _time); \
static inline void update_q2c(struct io *iop, __u64 c_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "q2c %13.9f\n", BIT_TIME(c_time));
+#endif
UPDATE_AVGS(q2c, iop, iop->pip, c_time);
}
static inline void update_q2a(struct io *iop, __u64 a_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "q2a %13.9f\n", BIT_TIME(a_time));
+#endif
UPDATE_AVGS(q2a, iop, iop->pip, a_time);
}
static inline void update_q2i(struct io *iop, __u64 i_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "q2i %13.9f\n", BIT_TIME(i_time));
+#endif
UPDATE_AVGS(q2i, iop, iop->pip, i_time);
}
+static inline void unupdate_q2i(struct io *iop, __u64 i_time)
+{
+ UNUPDATE_AVGS(q2i, iop, iop->pip, i_time);
+}
+
static inline void update_i2d(struct io *iop, __u64 d_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "i2d %13.9f\n", BIT_TIME(d_time));
+#endif
UPDATE_AVGS(i2d, iop, iop->pip, d_time);
}
UNUPDATE_AVGS(i2d, iop, iop->pip, d_time);
}
-static inline void update_d2c(struct io *iop, __u64 c_time)
+static inline void update_d2c(struct io *iop, int n, __u64 c_time)
{
- UPDATE_AVGS(d2c, iop, iop->pip, c_time);
+ if (per_io_ofp) fprintf(per_io_ofp, "d2c %13.9f\n", n*BIT_TIME(c_time));
+ UPDATE_AVGS_N(d2c, iop, iop->pip, c_time, n);
}
static inline void update_blks(struct io *iop)
static inline void dip_rb_rem(struct io *iop)
{
rb_erase(&iop->rb_node, __get_root(iop->dip, iop->type));
+#if defined(DEBUG)
+ rb_tree_size--;
+#endif
}
static inline void dip_rb_fe(struct d_info *dip, enum iop_type type,
return rb_find_sec(__get_root(dip, type), sec);
}
-static inline struct io *list_first_down(struct io *iop)
+static inline void bump_retry(__u64 now)
{
- struct list_head *p = list_first(&iop->down_list);
- return p ? list_entry(p, struct io, up_head) : NULL;
-}
-
-static inline struct io *list_first_up(struct io *iop)
-{
- struct list_head *p = list_first(&iop->up_list);
- return p ? list_entry(p, struct io, down_head) : NULL;
-}
-
-static inline int list_empty_up(struct io *iop)
-{
- return list_empty(&iop->up_list);
-}
-
-static inline void __link(struct io *down_iop, struct io *up_iop)
-{
- list_add_tail(&down_iop->up_head, &up_iop->down_list);
- list_add_tail(&up_iop->down_head, &down_iop->up_list);
-}
-
-static inline void __unlink(struct io *down_iop, struct io *up_iop)
-{
- LIST_DEL(&down_iop->up_head);
- LIST_DEL(&up_iop->down_head);
+ if (!list_empty(&retries))
+ next_retry_check = now + (100 * 1000); // 100 usec
+ else
+ next_retry_check = 0;
}
static inline void add_retry(struct io *iop)
{
+ bump_retry(iop->t.time);
if (!iop->on_retry_list) {
list_add_tail(&iop->retry, &retries);
iop->on_retry_list = 1;
LIST_DEL(&iop->retry);
iop->on_retry_list = 0;
}
+ bump_retry(iop->t.time);
}
static inline __u64 tdelta(struct io *iop1, struct io *iop2)
__u64 t2 = iop2->t.time;
return (t1 < t2) ? (t2 - t1) : 1;
}
+
+static inline int remapper_dev(__u32 dev)
+{
+ int mjr = MAJOR(dev);
+ return mjr == 9 || mjr == 254; // 253? That's what is in blkparse.c
+ // But I see 254...
+}
+
+static inline void dump_iop(struct io *iop, int extra_nl)
+{
+ if (per_io_ofp)
+ __dump_iop(per_io_ofp, iop, extra_nl);
+}
+
+static inline void dump_iop2(struct io *a_iop, struct io *l_iop)
+{
+ if (per_io_ofp)
+ __dump_iop2(per_io_ofp, a_iop, l_iop);
+}
+
+static inline int type2c(enum iop_type type)
+{
+ int c;
+
+ switch (type) {
+ case IOP_Q: c = 'Q'; break;
+ case IOP_X: c = 'X'; break;
+ case IOP_A: c = 'A'; break;
+ case IOP_I: c = 'I'; break;
+ case IOP_M: c = 'M'; break;
+ case IOP_D: c = 'D'; break;
+ case IOP_C: c = 'C'; break;
+ case IOP_R: c = 'R'; break;
+ case IOP_L: c = 'L'; break;
+ default : c = '?'; break;
+ }
+
+ return c;
+}
+
+static inline void bilink_free(struct bilink *blp)
+{
+ free(blp);
+}
+
+static inline struct bilink *bilink_alloc(struct io *diop, struct io *uiop)
+{
+ struct bilink *blp = malloc(sizeof(*blp));
+
+ blp->diop = diop;
+ blp->uiop = uiop;
+
+ return blp;
+}
+
+static inline void bilink(struct io *diop, struct io *uiop)
+{
+ struct bilink *blp = bilink_alloc(diop, uiop);
+
+ list_add_tail(&blp->down_head, &diop->up_list);
+ list_add_tail(&blp->up_head, &uiop->down_list);
+
+ diop->up_len++;
+ uiop->down_len++;
+}
+
+static inline void biunlink(struct bilink *blp)
+{
+ LIST_DEL(&blp->down_head);
+ LIST_DEL(&blp->up_head);
+ blp->diop->up_len--;
+ blp->uiop->down_len--;
+ bilink_free(blp);
+}
+
+static inline struct io *bilink_first_down(struct io *iop,
+ struct bilink **blp_p)
+{
+ struct bilink *blp;
+
+ if (list_empty(&iop->down_list))
+ return NULL;
+ blp = list_entry(iop->down_list.next, struct bilink, up_head);
+
+ if (blp_p != NULL)
+ *blp_p = blp;
+ return blp->diop;
+}
+
+static inline struct io *bilink_first_up(struct io *iop, struct bilink **blp_p)
+{
+ struct bilink *blp;
+
+ if (list_empty(&iop->up_list))
+ return NULL;
+ blp = list_entry(iop->up_list.next, struct bilink, down_head);
+
+ if (blp_p != NULL)
+ *blp_p = blp;
+ return blp->diop;
+}
+
+typedef void (*bilink_func)(struct io *diop, struct io *uiop, void *param);
+static inline void bilink_for_each_down(bilink_func func, struct io *uiop,
+ void *param, int ul)
+{
+ struct bilink *blp;
+ struct list_head *p, *q;
+
+ list_for_each_safe(p, q, &uiop->down_list) {
+ blp = list_entry(p, struct bilink, up_head);
+ func(blp->diop, uiop, param);
+ if (ul)
+ biunlink(blp);
+ }
+}
#define INLINE_DECLARE
#include "globals.h"
-int data_is_native = -1;
-
-struct blk_io_trace *convert_to_cpu(struct blk_io_trace *t)
-{
- if (data_is_native == -1)
- check_data_endianness(t->magic);
-
- trace_to_cpu(t);
-
- ASSERT(CHECK_MAGIC(t));
- ASSERT((t->magic & 0xff) == SUPPORTED_VERSION);
-
- return t;
-}
-
int in_devices(struct blk_io_trace *t)
{
int i;
return 0;
}
-unsigned int do_read(int ifd, void *buf, int len)
-{
- int n;
-
- n = read(ifd, buf, len);
- if (n < 0) {
- perror(input_name);
- return 1;
- }
- else if (0 < n && n < len) {
- fprintf(stderr,"Short read on %s\n", input_name);
- return 1;
- }
- else if (n == 0) /* EOF */
- return 1;
-
- return 0;
-}
-
void add_file(struct file_info **fipp, FILE *fp, char *oname)
{
struct file_info *fip;
--- /dev/null
+/*
+ * blktrace output analysis: generate a timeline & gather statistics
+ *
+ * Copyright (C) 2006 Alan D. Brunelle <Alan.Brunelle@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include "blktrace.h"
+
+#define DEF_LEN (16 * 1024 * 1024)
+
+static int fd;
+static void *cur_map = MAP_FAILED;
+static off_t cur_min, cur, cur_max, total_size;
+static size_t len;
+static struct blk_io_trace *next_t;
+static long pgsz;
+
+int data_is_native = -1;
+
+static inline size_t min_len(size_t a, size_t b) { return a < b ? a : b; }
+
+static inline size_t convert_to_cpu(struct blk_io_trace *t,
+ struct blk_io_trace *tp,
+ void **pdu)
+{
+ if (data_is_native == -1)
+ check_data_endianness(t->magic);
+
+ if (data_is_native)
+ memcpy(tp, t, sizeof(*tp));
+ else {
+ tp->magic = be32_to_cpu(t->magic);
+ tp->sequence = be32_to_cpu(t->sequence);
+ tp->time = be64_to_cpu(t->time);
+ tp->sector = be64_to_cpu(t->sector);
+ tp->bytes = be32_to_cpu(t->bytes);
+ tp->action = be32_to_cpu(t->action);
+ tp->pid = be32_to_cpu(t->pid);
+ tp->device = be32_to_cpu(t->device);
+ tp->cpu = be16_to_cpu(t->cpu);
+ tp->error = be16_to_cpu(t->error);
+ tp->pdu_len = be16_to_cpu(t->pdu_len);
+ }
+
+ assert(CHECK_MAGIC(tp));
+ assert((tp->magic & 0xff) == SUPPORTED_VERSION);
+
+ if (tp->pdu_len) {
+ *pdu = malloc(tp->pdu_len);
+ memcpy(*pdu, t+1, tp->pdu_len);
+ }
+ else
+ *pdu = NULL;
+
+ return sizeof(*tp) + tp->pdu_len;
+}
+
+static int move_map(void)
+{
+ if (cur_map != MAP_FAILED)
+ munmap(cur_map, len);
+
+ cur_min = (cur & ~(pgsz-1));
+ len = min_len(DEF_LEN, total_size - cur_min);
+ if (len < sizeof(*next_t))
+ return 0;
+
+ cur_map = mmap(NULL, len, PROT_READ, MAP_SHARED, fd,
+ cur_min);
+ if (cur_map == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ cur_max = cur_min + len;
+ return (cur < cur_max);
+}
+
+void setup_ifile(char *fname)
+{
+ struct stat buf;
+
+ pgsz = sysconf(_SC_PAGESIZE);
+
+ fd = open(fname, O_RDONLY);
+ if (fd < 0) {
+ perror(fname);
+ exit(1);
+ }
+ if (fstat(fd, &buf) < 0) {
+ perror(fname);
+ exit(1);
+ }
+ total_size = buf.st_size;
+
+ if (!move_map())
+ exit(0);
+}
+
+void cleanup_ifile(void)
+{
+ if (cur_map != MAP_FAILED)
+ munmap(cur_map, len);
+ close(fd);
+}
+
+int next_trace(struct blk_io_trace *t, void **pdu)
+{
+ size_t this_len;
+
+ if ((cur + 512) > cur_max)
+ if (!move_map())
+ return 0;
+
+ next_t = cur_map + (cur - cur_min);
+ this_len = convert_to_cpu(next_t, t, pdu);
+ cur += this_len;
+
+ return 1;
+}
int dump_level;
LIST_HEAD(retries);
-static inline void dump_dev(FILE *ofp, __u32 dev)
+void __dump_iop(FILE *ofp, struct io *iop, int extra_nl)
{
- fprintf(ofp, "%3d,%-3d ", MAJOR(dev), MINOR(dev));
+ fprintf(ofp, "%5d.%09lu %3d,%-3d %c %10llu+%-4u\n",
+ (int)SECONDS(iop->t.time),
+ (unsigned long)NANO_SECONDS(iop->t.time),
+ MAJOR(iop->t.device), MINOR(iop->t.device), type2c(iop->type),
+ (unsigned long long)iop->t.sector, t_sec(&iop->t));
+ if (extra_nl) fprintf(ofp, "\n");
}
-static inline void dump_desc(FILE *ofp, struct io *iop)
+void __dump_iop2(FILE *ofp, struct io *a_iop, struct io *l_iop)
{
- fprintf(ofp, "%10llu+%-4u ", (unsigned long long)iop->t.sector,
- t_sec(&iop->t));
+ fprintf(ofp, "%5d.%09lu %3d,%-3d %c %10llu+%-4u <- (%3d,%-3d) %10llu\n",
+ (int)SECONDS(a_iop->t.time),
+ (unsigned long)NANO_SECONDS(a_iop->t.time),
+ MAJOR(a_iop->t.device), MINOR(a_iop->t.device),
+ type2c(a_iop->type), (unsigned long long)a_iop->t.sector,
+ t_sec(&a_iop->t), MAJOR(l_iop->t.device),
+ MINOR(l_iop->t.device), (unsigned long long)l_iop->t.sector);
}
-void dump_iop(FILE *ofp, struct io *to_iop, struct io *from_iop, int indent)
-{
- int i, c;
-
- if (!ofp) return;
- if (to_iop->displayed) return;
-
- fprintf(ofp, "%5d.%09lu ", (int)SECONDS(to_iop->t.time),
- (unsigned long)NANO_SECONDS(to_iop->t.time));
-
- for (i = 0; i < ((dump_level * 4) + indent); i++)
- fprintf(ofp, " ");
-
- dump_dev(ofp, to_iop->t.device);
-
- switch (to_iop->type) {
- case IOP_Q: c = 'Q'; break;
- case IOP_L: c = 'L'; break;
- case IOP_A: c = 'A'; break;
- case IOP_I: c = 'I'; break;
- case IOP_M: c = 'M'; break;
- case IOP_D: c = 'D'; break;
- case IOP_C: c = 'C'; break;
- default : c = '?'; break;
- }
-
- fprintf(ofp, "%c ", c);
- dump_desc(ofp, to_iop);
- if (from_iop) {
- fprintf(ofp, "<- ");
- dump_dev(ofp, from_iop->t.device);
- dump_desc(ofp, from_iop);
- }
-
- fprintf(ofp, "\n");
-
- to_iop->displayed = 1;
-}
-
-void release_iops(struct list_head *del_head)
+void release_iops(struct list_head *rmhd)
{
struct io *x_iop;
struct list_head *p, *q;
- list_for_each_safe(p, q, del_head) {
+ list_for_each_safe(p, q, rmhd) {
x_iop = list_entry(p, struct io, f_head);
LIST_DEL(&x_iop->f_head);
io_release(x_iop);
}
}
-void do_retries(void)
+void do_retries(__u64 now)
{
struct io *iop;
struct list_head *p, *q;
list_for_each_safe(p, q, &retries) {
iop = list_entry(p, struct io, retry);
+ ASSERT(iop->type == IOP_C);
+
// iop could be gone after call...
- if (iop->type == IOP_C)
- retry_complete(iop);
- else
- retry_requeue(iop);
+ retry_complete(iop, now);
}
}
+static inline int retry_check_time(__u64 t)
+{
+ return next_retry_check && (t > next_retry_check);
+}
+
static void __add_trace(struct io *iop)
{
time_t now = time(NULL);
+ __u64 tstamp = iop->t.time;
+ int run_retry = retry_check_time(iop->t.time);
n_traces++;
iostat_check_time(iop->t.time);
if (verbose && ((now - last_vtrace) > 0)) {
+#if defined(DEBUG)
+ printf("%10lu t\tretries=|%10d|\ttree size=|%10d|\r",
+ n_traces, list_len(&retries), rb_tree_size);
+#else
printf("%10lu t\r", n_traces);
+#endif
if ((n_traces % 1000000) == 0) printf("\n");
fflush(stdout);
last_vtrace = now;
return;
}
- if (((iop->t.action & 0xffff) != __BLK_TA_REQUEUE) &&
- !list_empty(&retries))
- do_retries();
+ if (run_retry && !list_empty(&retries)) {
+ do_retries(tstamp);
+ bump_retry(tstamp);
+ }
}
void add_trace(struct io *iop)
{
if (iop->t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) {
- char *slash = strchr(iop->pdu, '/');
-
- if (slash)
- *slash = '\0';
-
- add_process(iop->t.pid, iop->pdu);
+ if (iop->t.pid == 0)
+ add_process(0, "kernel");
+ else {
+ char *slash = strchr(iop->pdu, '/');
+ if (slash)
+ *slash = '\0';
+
+ add_process(iop->t.pid, iop->pdu);
+ }
io_release(iop);
}
else if (iop->t.action & BLK_TC_ACT(BLK_TC_PC))
LIST_HEAD(pending_cs);
-static void gen_c_list(struct io *c_iop, struct list_head *c_head)
+static inline void __run_complete(struct io *c_iop)
{
- struct io *iop;
- struct list_head *p;
-
- __list_for_each(p, &pending_cs) {
- iop = list_entry(p, struct io, c_pending);
- if (iop->t.device == c_iop->t.device)
- continue;
- if (dip_find_sec(iop->dip, IOP_D, BIT_START(iop)) == NULL)
- continue;
-
- __link(iop, c_iop);
- if (ready_complete(iop, c_iop))
- list_add_tail(&iop->f_head, c_head);
- __unlink(iop, c_iop);
- }
-}
-
-static void run_comp(struct io *c_iop, struct io *top, struct list_head *rmhd)
-{
- struct io *d_iop = dip_find_sec(c_iop->dip, IOP_D, BIT_START(c_iop));
-
- update_blks(c_iop);
- if (d_iop) {
- __u64 d2c = tdelta(d_iop, c_iop);
+ LIST_HEAD(rmhd);
- update_d2c(d_iop, d2c);
- latency_d2c(d_iop->dip, c_iop->t.time, d2c);
- iostat_complete(d_iop, c_iop);
+ if (remapper_dev(c_iop->t.device))
+ bilink_for_each_down(run_remap, c_iop, &rmhd, 1);
+ else
+ bilink_for_each_down(run_issue, c_iop, &rmhd, 1);
- __link(d_iop, c_iop);
- run_issue(d_iop, top, rmhd);
- __unlink(d_iop, c_iop);
- }
- else {
- LIST_HEAD(head);
- struct io *iop;
- struct list_head *p, *q;
-
- gen_c_list(c_iop, &head);
- list_for_each_safe(p, q, &head) {
- iop = list_entry(p, struct io, f_head);
- LIST_DEL(&iop->f_head);
-
- dump_level++;
- __link(iop, c_iop);
- run_comp(iop, top, rmhd);
- __unlink(iop, c_iop);
- dump_level--;
- }
- }
-
- dump_iop(per_io_ofp, c_iop, NULL, 0);
+ dump_iop(c_iop, 1);
LIST_DEL(&c_iop->c_pending);
del_retry(c_iop);
- list_add_tail(&c_iop->f_head, rmhd);
+
+ list_add_tail(&c_iop->f_head, &rmhd);
+ release_iops(&rmhd);
}
-static int ready_comp(struct io *c_iop,
- __attribute__((__unused__)) struct io *top)
+static int ready_complete_remapper(struct io *c_iop)
{
LIST_HEAD(head);
- struct io *iop;
struct list_head *p, *q;
- __u64 bl = c_iop->bytes_left;
+ struct io *l_iop, *a_iop;
- gen_c_list(c_iop, &head);
+ dip_foreach_list(c_iop, IOP_L, &head);
list_for_each_safe(p, q, &head) {
- iop = list_entry(p, struct io, f_head);
- LIST_DEL(&iop->f_head);
-
- __link(iop, c_iop);
- if (ready_complete(iop, c_iop))
- bl -= iop->bytes_left;
- __unlink(iop, c_iop);
+ l_iop = list_entry(p, struct io, f_head);
+ LIST_DEL(&l_iop->f_head);
+
+ ASSERT(!list_empty(&l_iop->up_list));
+ a_iop = bilink_first_up(l_iop, NULL);
+ if (ready_remap(a_iop, c_iop)) {
+ dip_rem(l_iop);
+ bilink(a_iop, c_iop);
+ c_iop->bytes_left -= a_iop->t.bytes;
+ }
}
- return bl == 0;
+ return c_iop->bytes_left == 0;
}
-void trace_complete(struct io *c_iop)
+int ready_complete(struct io *c_iop)
{
- if (!io_setup(c_iop, IOP_C)) {
- io_release(c_iop);
- return;
+ __u64 d2c;
+ struct io *d_iop;
+
+ if (c_iop->bytes_left == 0)
+ return 1;
+
+ if (remapper_dev(c_iop->t.device))
+ return ready_complete_remapper(c_iop);
+
+ if (!list_empty(&c_iop->down_list))
+ return 1;
+
+ d_iop = dip_find_sec(c_iop->dip, IOP_D, BIT_START(c_iop));
+ if (!d_iop)
+ return -1;
+
+ if (c_iop->t.bytes != d_iop->t.bytes) {
+ fprintf(stderr,
+ "\nFATAL: Probable time anomaly detected\n");
+ fprintf(stderr,
+ "D @ %15.9lf missing C, later C @ %15.9lf\n",
+ BIT_TIME(d_iop->t.time),
+ BIT_TIME(c_iop->t.time));
+ exit(1);
}
- list_add_tail(&c_iop->c_pending, &pending_cs);
- if (ready_complete(c_iop, c_iop)) {
- dump_level = 0;
- run_complete(c_iop);
- }
- else
- add_retry(c_iop);
-}
-
-int retry_complete(struct io *c_iop)
-{
- if (!ready_complete(c_iop, c_iop))
+ if (!ready_issue(d_iop, c_iop))
return 0;
- run_complete(c_iop);
+ c_iop->bytes_left = 0;
+
+ d2c = tdelta(d_iop, c_iop);
+ update_d2c(d_iop, d_iop->down_len, d2c);
+ latency_d2c(d_iop->dip, c_iop->t.time, d2c);
+ iostat_complete(d_iop, c_iop);
+
+ bilink(d_iop, c_iop);
+ dip_rem(d_iop);
return 1;
}
-int ready_complete(struct io *c_iop, struct io *top)
+void trace_complete(struct io *c_iop)
{
- struct io *d_iop = dip_find_sec(c_iop->dip, IOP_D, BIT_START(c_iop));
-
- if (d_iop) {
- ASSERT(d_iop->t.bytes == c_iop->bytes_left);
- return ready_issue(d_iop, top);
+ if (io_setup(c_iop, IOP_C)) {
+ update_blks(c_iop);
+ update_cregion(&all_regions, c_iop->t.time);
+ update_cregion(&c_iop->dip->regions, c_iop->t.time);
+ if (c_iop->pip)
+ update_cregion(&c_iop->pip->regions, c_iop->t.time);
+
+ list_add_tail(&c_iop->c_pending, &pending_cs);
+ switch (ready_complete(c_iop)) {
+ case 1:
+ __run_complete(c_iop);
+ break;
+ case 0:
+ add_retry(c_iop);
+ break;
+ case -1:
+ LIST_DEL(&c_iop->c_pending);
+ del_retry(c_iop);
+ io_release(c_iop);
+ break;
+ }
}
else
- return ready_comp(c_iop, top);
+ io_release(c_iop);
}
-void run_complete(struct io *c_iop)
+void retry_complete(struct io *c_iop, __u64 now)
{
- LIST_HEAD(rmhd);
-
- update_cregion(&all_regions, c_iop->t.time);
- update_cregion(&c_iop->dip->regions, c_iop->t.time);
- if (c_iop->pip)
- update_cregion(&c_iop->pip->regions, c_iop->t.time);
-
- run_comp(c_iop, c_iop, &rmhd);
- if (per_io_ofp) fprintf(per_io_ofp, "\n");
- release_iops(&rmhd);
+ double tc = BIT_TIME(c_iop->t.time);
+
+ switch (ready_complete(c_iop)) {
+ case 1:
+#if defined(DEBUG)
+ fprintf(stderr, "Retried %15.9lf success!\n", tc);
+#endif
+ __run_complete(c_iop);
+ break;
+ case 0:
+ if (now == 0 || ((BIT_TIME(now) - tc) < 1.0))
+ break;
+ if (!list_empty(&c_iop->down_list))
+ break;
+ /*FALLTHROUGH*/
+ case -1:
+ LIST_DEL(&c_iop->c_pending);
+ del_retry(c_iop);
+ io_release(c_iop);
+ break;
+ }
}
*/
#include "globals.h"
-void trace_insert(struct io *i_iop)
+struct params {
+ struct io *c_iop;
+ struct list_head *rmhd;
+};
+
+void __run_im(struct io *q_iop, struct io *im_iop, void *param)
{
- if (!io_setup(i_iop, IOP_I)) {
- io_release(i_iop);
- return;
- }
- iostat_insert(i_iop);
+ struct params *p = param;
+ run_queue(q_iop, p->c_iop, p->rmhd);
+ dump_iop(im_iop, 0);
+ list_add_tail(&im_iop->f_head, p->rmhd);
}
-void trace_merge(struct io *m_iop)
+void __run_unim(struct io *q_iop, struct io *im_iop, void *param)
{
- if (!io_setup(m_iop, IOP_M)) {
- io_release(m_iop);
- return;
+ struct params *p = param;
+ if (q_iop->bytes_left == 0) {
+ q_iop->linked = dip_rb_ins(q_iop->dip, q_iop);
+ ASSERT(q_iop->linked);
+#if defined(DEBUG)
+ rb_tree_size++;
+#endif
}
- iostat_merge(m_iop);
+
+ q_iop->bytes_left += im_iop->t.bytes;
+ unupdate_q2i(q_iop, tdelta(q_iop, im_iop));
+ list_add_tail(&im_iop->f_head, p->rmhd);
}
-int ready_im(struct io *im_iop, struct io *top)
+void run_im(struct io *im_iop, struct io *c_iop, void *param)
{
- struct io *q_iop = dip_find_sec(im_iop->dip, IOP_Q, BIT_START(im_iop));
-
- if (q_iop) {
- ASSERT(q_iop->bytes_left >= im_iop->bytes_left);
- return ready_queue(q_iop, top);
- }
+ struct params p = {
+ .c_iop = c_iop,
+ .rmhd = (struct list_head *)param
+ };
+ bilink_for_each_down(__run_im, im_iop, &p, 1);
+}
- return 0;
+void run_unim(struct io *im_iop, struct list_head *rmhd)
+{
+ struct params p = {
+ .c_iop = NULL,
+ .rmhd = rmhd
+ };
+ bilink_for_each_down(__run_unim, im_iop, &p, 1);
}
-void run_im(struct io *im_iop, struct io *top, struct list_head *del_head)
+int ready_im(struct io *im_iop, struct io *c_iop)
{
- struct io *q_iop = dip_find_sec(im_iop->dip, IOP_Q, BIT_START(im_iop));
+ if (im_iop->bytes_left > 0) {
+ __u64 xfer;
+ LIST_HEAD(head);
+ struct io *q_iop;
+ struct list_head *p, *q;
- ASSERT(q_iop);
- update_q2i(q_iop, tdelta(q_iop, im_iop));
+ dip_foreach_list(im_iop, IOP_Q, &head);
+ list_for_each_safe(p, q, &head) {
+ q_iop = list_entry(p, struct io, f_head);
+ LIST_DEL(&q_iop->f_head);
- __link(q_iop, im_iop);
- run_queue(q_iop, top, del_head);
- __unlink(q_iop, im_iop);
+ if (ready_queue(q_iop, c_iop)) {
+ update_q2i(q_iop, tdelta(q_iop, im_iop));
- dump_iop(per_io_ofp, im_iop, NULL, 0);
- list_add_tail(&im_iop->f_head, del_head);
+ bilink(q_iop, im_iop);
+ dip_rem(q_iop);
+
+ xfer = min(im_iop->bytes_left,
+ q_iop->bytes_left);
+ im_iop->bytes_left -= xfer;
+ q_iop->bytes_left -= xfer;
+
+ if (q_iop->bytes_left == 0)
+ dip_rem(q_iop);
+ }
+ }
+ }
+
+ return im_iop->bytes_left == 0;
}
-void run_unim(struct io *im_iop, struct list_head *del_head)
+void trace_insert(struct io *i_iop)
{
- struct io *q_iop = dip_find_sec(im_iop->dip, IOP_Q, BIT_START(im_iop));
+ if (io_setup(i_iop, IOP_I))
+ iostat_insert(i_iop);
+ else
+ io_release(i_iop);
- __link(q_iop, im_iop);
- run_unqueue(q_iop, del_head);
- __unlink(q_iop, im_iop);
+}
- list_add_tail(&im_iop->f_head, del_head);
+void trace_merge(struct io *m_iop)
+{
+ if (io_setup(m_iop, IOP_M))
+ iostat_merge(m_iop);
+ else
+ io_release(m_iop);
}
*/
#include "globals.h"
-void trace_issue(struct io *d_iop)
+struct params {
+ struct io *c_iop;
+ struct list_head *rmhd;
+};
+
+void __run_issue(struct io *im_iop, struct io *d_iop, void *param)
{
- if (!io_setup(d_iop, IOP_D)) {
- io_release(d_iop);
- return;
- }
+ struct params *p = param;
- if (seek_name)
- seeki_add(d_iop->dip->seek_handle, d_iop);
- iostat_issue(d_iop);
- d_iop->dip->n_ds++;
+ update_i2d(im_iop, tdelta(im_iop, d_iop));
+ run_im(im_iop, p->c_iop, p->rmhd);
+ dump_iop(d_iop, 0);
+ list_add_tail(&d_iop->f_head, p->rmhd);
}
-int ready_issue(struct io *d_iop, struct io *top)
+void __run_unissue(struct io *im_iop, struct io *d_iop, void *param)
{
- LIST_HEAD(head);
- struct io *im_iop;
- struct list_head *p, *q;
- __u64 bl = d_iop->bytes_left;
-
- dip_foreach_list(d_iop, IOP_I, &head);
- dip_foreach_list(d_iop, IOP_M, &head);
- list_for_each_safe(p, q, &head) {
- im_iop = list_entry(p, struct io, f_head);
- LIST_DEL(&im_iop->f_head);
+ struct params *p = param;
- if (!ready_im(im_iop, top))
- return 0;
-
- bl -= im_iop->bytes_left;
- }
-
- return bl == 0;
+ unupdate_i2d(im_iop, tdelta(im_iop, d_iop));
+ run_unim(im_iop, p->rmhd);
+ list_add_tail(&d_iop->f_head, p->rmhd);
}
-void run_issue(struct io *d_iop, struct io *top, struct list_head *del_head)
+void run_issue(struct io *d_iop, struct io *c_iop, void *param)
{
- LIST_HEAD(head);
- struct list_head *p, *q;
- struct io *im_iop;
-
- dip_foreach_list(d_iop, IOP_I, &head);
- dip_foreach_list(d_iop, IOP_M, &head);
- list_for_each_safe(p, q, &head) {
- im_iop = list_entry(p, struct io, f_head);
- LIST_DEL(&im_iop->f_head);
+ struct params p = {
+ .c_iop = c_iop,
+ .rmhd = (struct list_head *)param
+ };
+ bilink_for_each_down(__run_issue, d_iop, &p, 1);
+}
- update_i2d(im_iop, tdelta(im_iop, d_iop));
+void run_unissue(struct io *d_iop, struct list_head *rmhd)
+{
+ struct params p = {
+ .c_iop = NULL,
+ .rmhd = rmhd
+ };
+ bilink_for_each_down(__run_unissue, d_iop, &p, 1);
+}
- __link(im_iop, d_iop);
- run_im(im_iop, top, del_head);
- __unlink(im_iop, d_iop);
+int ready_issue(struct io *d_iop, struct io *c_iop)
+{
+ if (d_iop->bytes_left > 0) {
+ LIST_HEAD(head);
+ struct io *im_iop;
+ struct list_head *p, *q;
+
+ dip_foreach_list(d_iop, IOP_I, &head);
+ dip_foreach_list(d_iop, IOP_M, &head);
+ list_for_each_safe(p, q, &head) {
+ im_iop = list_entry(p, struct io, f_head);
+ LIST_DEL(&im_iop->f_head);
+
+ if (ready_im(im_iop, c_iop)) {
+ ASSERT(d_iop->bytes_left >= im_iop->t.bytes);
+ bilink(im_iop, d_iop);
+ dip_rem(im_iop);
+ d_iop->bytes_left -= im_iop->t.bytes;
+ }
+ }
}
- dump_iop(per_io_ofp, d_iop, NULL, 0);
- list_add_tail(&d_iop->f_head, del_head);
+ return d_iop->bytes_left == 0;
}
-void run_unissue(struct io *d_iop, struct list_head *del_head)
+void trace_issue(struct io *d_iop)
{
- LIST_HEAD(head);
- struct io *im_iop;
- struct list_head *p, *q;
-
- iostat_unissue(d_iop);
-
- dip_foreach_list(d_iop, IOP_I, &head);
- dip_foreach_list(d_iop, IOP_M, &head);
- list_for_each_safe(p, q, &head) {
- im_iop = list_entry(p, struct io, f_head);
- LIST_DEL(&im_iop->f_head);
-
- __link(im_iop, d_iop);
- unupdate_i2d(im_iop, tdelta(im_iop, d_iop));
- run_unim(im_iop, del_head);
- __unlink(im_iop, d_iop);
+ if (io_setup(d_iop, IOP_D)) {
+ if (seek_name)
+ seeki_add(d_iop->dip->seek_handle, d_iop);
+ iostat_issue(d_iop);
+ d_iop->dip->n_ds++;
}
+ else
+ io_release(d_iop);
- list_add_tail(&d_iop->f_head, del_head);
}
*/
#include "globals.h"
-void trace_queue(struct io *q_iop)
+static inline void __update_q2c(struct io *q_iop, struct io *c_iop)
{
- if (!io_setup(q_iop, IOP_Q)) {
- io_release(q_iop);
- return;
- }
+ __u64 q2c = tdelta(q_iop, c_iop);
- update_lq(&last_q, &all_avgs.q2q, q_iop->t.time);
- update_qregion(&all_regions, q_iop->t.time);
- dip_update_q(q_iop->dip, q_iop);
- pip_update_q(q_iop);
+ update_q2c(q_iop, q2c);
+ latency_q2c(q_iop->dip, q_iop->t.time, q2c);
}
-int ready_queue(struct io *q_iop, struct io *top)
+void run_queue(struct io *q_iop, struct io *c_iop, struct list_head *rmhd)
{
- struct io *a_iop = dip_find_sec(q_iop->dip, IOP_A, BIT_START(q_iop));
+ struct bilink *blp;
+ struct io *a_iop = bilink_first_down(q_iop, &blp);
if (a_iop) {
- ASSERT(a_iop->bytes_left == q_iop->bytes_left);
- return ready_remap(a_iop, top);
+ run_remap(a_iop, c_iop, rmhd);
+ biunlink(blp);
}
- return q_iop->t.device == top->t.device &&
- BIT_START(top) <= BIT_START(q_iop) &&
- BIT_END(q_iop) <= BIT_END(top);
+ __update_q2c(q_iop, c_iop);
+ dump_iop(q_iop, 0);
+
+ list_add_tail(&q_iop->f_head, rmhd);
}
-void run_queue(struct io *q_iop, struct io *top, struct list_head *del_head)
+int ready_queue(struct io *q_iop, struct io *c_iop)
{
- struct io *iop;
- struct io *a_iop = dip_find_sec(q_iop->dip, IOP_A, BIT_START(q_iop));
-
- if (a_iop) {
- __link(a_iop, q_iop);
- run_remap(a_iop, top, del_head);
- __unlink(a_iop, q_iop);
- }
+ struct io *a_iop;
- for (iop = q_iop; iop != NULL; iop = list_first_up(iop)) {
- if (iop->type == IOP_C && iop->t.device == q_iop->t.device) {
- __u64 q2c = tdelta(q_iop, iop);
+ if (!list_empty(&q_iop->down_list))
+ return 1;
- update_q2c(q_iop, q2c);
- latency_q2c(q_iop->dip, q_iop->t.time, q2c);
+ a_iop = dip_find_sec(q_iop->dip, IOP_A, BIT_START(q_iop));
+ if (!a_iop)
+ return 1;
- dump_iop(per_io_ofp, q_iop, NULL,
- (q_iop->t.device == top->t.device) ? -4 : 0);
+ if (!ready_remap(a_iop, c_iop))
+ return 0;
- break;
- }
- }
-
- iop = list_first_up(q_iop);
- q_iop->bytes_left -= iop->bytes_left;
- if (q_iop->bytes_left == 0)
- list_add_tail(&q_iop->f_head, del_head);
+ ASSERT(q_iop->t.bytes == a_iop->t.bytes);
+ bilink(a_iop, q_iop);
+ dip_rem(a_iop);
+ return 1;
}
-void run_unqueue(struct io *q_iop, struct list_head *del_head)
+void trace_queue(struct io *q_iop)
{
- struct io *a_iop = dip_find_sec(q_iop->dip, IOP_A, BIT_START(q_iop));
-
- if (a_iop) {
- __link(a_iop, q_iop);
- run_unremap(a_iop, del_head);
- __unlink(a_iop, q_iop);
+ if (io_setup(q_iop, IOP_Q)) {
+ update_lq(&last_q, &all_avgs.q2q, q_iop->t.time);
+ update_qregion(&all_regions, q_iop->t.time);
+ dip_update_q(q_iop->dip, q_iop);
+ pip_update_q(q_iop);
}
-
- list_add_tail(&q_iop->f_head, del_head);
+ else
+ io_release(q_iop);
}
*/
#include "globals.h"
-void trace_remap(struct io *a_iop)
+void run_remap(struct io *a_iop, struct io *c_iop, void *param)
{
- struct io *l_iop;
- struct blk_io_trace_remap *rp = a_iop->pdu;
- __u32 remap_dev = be32_to_cpu(rp->device);
- __u64 remap_sec = be64_to_cpu(rp->sector);
-
- if (!io_setup(a_iop, IOP_A)) {
- io_release(a_iop);
- return;
+ struct bilink *blp = blp, *blp2;
+ struct list_head *rmhd = param;
+ struct io *q_iop, *l_iop = bilink_first_down(a_iop, &blp);
+
+ ASSERT(l_iop);
+ q_iop = bilink_first_down(l_iop, &blp2);
+ if (q_iop) {
+ run_queue(q_iop, c_iop, rmhd);
+ biunlink(blp2);
}
- /*
- * Create a fake LINK trace
- */
- l_iop = io_alloc();
- memcpy(&l_iop->t, &a_iop->t, sizeof(a_iop->t));
- l_iop->t.device = remap_dev;
- l_iop->t.sector = remap_sec;
+ dump_iop2(a_iop, l_iop);
- if (!io_setup(l_iop, IOP_L)) {
- io_release(l_iop);
- io_release(a_iop);
- return;
- }
-
- __link(l_iop, a_iop);
- l_iop->self_remap = (MAJOR(a_iop->t.device) == MAJOR(remap_dev));
+ biunlink(blp);
+ list_add_tail(&l_iop->f_head, rmhd);
+ list_add_tail(&a_iop->f_head, rmhd);
}
-
-int ready_remap(struct io *a_iop, struct io *top)
+int ready_dev_remap(struct io *l_iop, struct io *c_iop)
{
- struct io *l_iop = list_first_down(a_iop);
- struct blk_io_trace_remap *rp = a_iop->pdu;
- __u64 remap_sec = be64_to_cpu(rp->sector);
+ struct io *q_iop;
- if (l_iop->self_remap) {
- struct io *a_iop = dip_find_sec(l_iop->dip, IOP_A, remap_sec);
- if (a_iop)
- return ready_remap(a_iop, top);
- }
- else {
- struct io *q_iop = dip_find_sec(l_iop->dip, IOP_Q, remap_sec);
- if (q_iop)
- return ready_queue(q_iop, top);
- }
+ if (!list_empty(&l_iop->down_list))
+ return 1;
+
+ q_iop = dip_find_sec(l_iop->dip, IOP_Q, l_iop->t.sector);
+ if (!q_iop || !ready_queue(q_iop, c_iop))
+ return 0;
+
+ ASSERT(l_iop->t.bytes <= q_iop->t.bytes);
+ update_q2a(q_iop, tdelta(q_iop, l_iop));
+ bilink(q_iop, l_iop);
- return 0;
+ q_iop->bytes_left -= l_iop->t.bytes;
+ if (q_iop->bytes_left == 0)
+ dip_rem(q_iop);
+ return 1;
}
-void run_remap(struct io *a_iop, struct io *top, struct list_head *del_head)
+int ready_self_remap(struct io *l_iop)
{
- struct io *l_iop = list_first_down(a_iop);
- struct blk_io_trace_remap *rp = a_iop->pdu;
- __u64 remap_sec = be64_to_cpu(rp->sector);
-
- if (l_iop->self_remap) {
- struct io *a2_iop = dip_find_sec(l_iop->dip, IOP_A, remap_sec);
- ASSERT(a2_iop);
- __link(a2_iop, l_iop);
- run_remap(a2_iop, top, del_head);
- __unlink(a2_iop, l_iop);
- }
- else {
- struct io *q_iop = dip_find_sec(l_iop->dip, IOP_Q, remap_sec);
- ASSERT(q_iop);
- __link(q_iop, l_iop);
- update_q2a(q_iop, tdelta(q_iop, a_iop));
- run_queue(q_iop, top, del_head);
- __unlink(q_iop, l_iop);
+ struct io *a_iop = dip_find_sec(l_iop->dip, IOP_A, l_iop->t.sector);
+
+ if (a_iop) {
+ update_q2a(a_iop, tdelta(a_iop, l_iop));
+ dip_rem(a_iop);
}
- dump_iop(per_io_ofp, a_iop, l_iop, 0);
+ return 1;
+}
+
+int ready_remap(struct io *a_iop, struct io *c_iop)
+{
+ struct io *l_iop = bilink_first_down(a_iop, NULL);
- __unlink(l_iop, a_iop);
- list_add_tail(&l_iop->f_head, del_head);
- list_add_tail(&a_iop->f_head, del_head);
+ ASSERT(l_iop);
+ if (remapper_dev(l_iop->t.device))
+ return ready_dev_remap(l_iop, c_iop);
+ else
+ return ready_self_remap(l_iop);
}
-void run_unremap(struct io *a_iop, struct list_head *del_head)
+void trace_remap(struct io *a_iop)
{
- struct io *l_iop = list_first_down(a_iop);
+ struct io *l_iop;
struct blk_io_trace_remap *rp = a_iop->pdu;
- __u64 remap_sec = be64_to_cpu(rp->sector);
- if (l_iop->self_remap) {
- struct io *a_iop = dip_find_sec(l_iop->dip, IOP_A, remap_sec);
- __link(a_iop, l_iop);
- run_unremap(a_iop, del_head);
- __unlink(a_iop, l_iop);
+ a_iop->t.device = be32_to_cpu(rp->device_from);
+ if (!io_setup(a_iop, IOP_A)) {
+ io_release(a_iop);
+ return;
+ }
+
+ l_iop = io_alloc();
+ memcpy(&l_iop->t, &a_iop->t, sizeof(a_iop->t));
+ if (l_iop->t.pdu_len) {
+ l_iop->pdu = malloc(l_iop->t.pdu_len);
+ memcpy(l_iop->pdu, a_iop->pdu, l_iop->t.pdu_len);
}
- else {
- struct io *q_iop = dip_find_sec(l_iop->dip, IOP_Q, remap_sec);
- __link(q_iop, l_iop);
- run_unqueue(q_iop, del_head);
- __unlink(q_iop, l_iop);
+
+ l_iop->t.device = be32_to_cpu(rp->device);
+ l_iop->t.sector = be64_to_cpu(rp->sector);
+ if (!io_setup(l_iop, IOP_L)) {
+ io_release(l_iop);
+ io_release(a_iop);
+ return;
}
- __unlink(l_iop, a_iop);
- list_add_tail(&l_iop->f_head, del_head);
- list_add_tail(&a_iop->f_head, del_head);
+ bilink(l_iop, a_iop);
}
void trace_requeue(struct io *r_iop)
{
- if (!io_setup(r_iop, IOP_R)) {
+ LIST_HEAD(rmhd);
+ struct io *d_iop;
+
+ if ((io_setup(r_iop, IOP_R) == 0) ||
+ (d_iop = dip_find_sec(r_iop->dip, IOP_D,
+ BIT_START(r_iop))) == NULL) {
io_release(r_iop);
return;
}
+ dip_rem(d_iop);
- if (ready_requeue(r_iop, r_iop))
- run_requeue(r_iop);
- else
- add_retry(r_iop);
-}
-
-int retry_requeue(struct io *r_iop)
-{
- if (!ready_requeue(r_iop, r_iop))
- return 0;
-
- run_requeue(r_iop);
- return 1;
-}
-
-int ready_requeue(struct io *r_iop, struct io *top)
-{
- struct io *d_iop = dip_find_sec(r_iop->dip, IOP_D, BIT_START(r_iop));
- if (d_iop)
- return ready_issue(d_iop, top);
- return 0;
-}
-
-void run_requeue(struct io *r_iop)
-{
- LIST_HEAD(del_head);
- struct io *d_iop = dip_find_sec(r_iop->dip, IOP_D, BIT_START(r_iop));
-
- __link(d_iop, r_iop);
- run_unissue(d_iop, &del_head);
- __unlink(d_iop, r_iop);
+#if defined(DEBUG)
+ ASSERT(ready_issue(d_iop, r_iop) != 0);
+#else
+ (void)ready_issue(d_iop, r_iop);
+#endif
- del_retry(r_iop);
- list_add_tail(&r_iop->f_head, &del_head);
- release_iops(&del_head);
+ run_unissue(d_iop, &rmhd);
+ list_add_tail(&r_iop->f_head, &rmhd);
+ release_iops(&rmhd);
}