diff options
author | Alan D. Brunelle <Alan.Brunelle@hp.com> | 2007-09-10 18:37:49 +0200 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2007-09-10 18:37:49 +0200 |
commit | 4c48f14ea8ae2fae86811ac4dc1d72ad9bb601c2 (patch) | |
tree | 97a01448fa090ed0aacf979e640bf77e40b637bf /btt/trace_complete.c | |
parent | a5981e2e795319502cc8dda629482928f3b7b204 (diff) | |
download | blktrace-4c48f14ea8ae2fae86811ac4dc1d72ad9bb601c2.tar.gz blktrace-4c48f14ea8ae2fae86811ac4dc1d72ad9bb601c2.tar.bz2 |
Major revamping (ver 2.0)
After a lot of fighting with maintaining a tree-styled design (each trace
having it's own node), it was just getting too cumbersome to work in all
circumstances. Taking a clue from blkparse itself, I decided to just keep
track of IOs at queue time, and updating fields based upon later traces.
The attached (large) patch works much faster, handles larger test cases
with less failures, and is managing some pretty large jobs I'm working on
(large Oracle-based DB analysis - 32-way box w/ lots of storage).
I've also added a Q2Q seek distance feature - it's come in handy when
comparing results of IO scheduler choice: We can see what the incoming IO
seek distances are (at queue time), and then see how the scheduler itself
manages things (via merges & sorting) by looking at D2D seek distances
generated.
As noted in the subject, I arbitrarily bumped this to version 2.00 as the
innards are so different. The documentation (btt/doc/btt.tex) has been
updated to reflect some minor output changes. I also fixed a bug dealing
with process name notification: there was a problem that if a new PID came
up with a name that was previously seen, btt wouldn't keep track of it
right. [When running with Oracle, a lot of processes have the same name but
different PIDs of course.]
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'btt/trace_complete.c')
-rw-r--r-- | btt/trace_complete.c | 192 |
1 files changed, 47 insertions, 145 deletions
diff --git a/btt/trace_complete.c b/btt/trace_complete.c index b48bf79..c51d43d 100644 --- a/btt/trace_complete.c +++ b/btt/trace_complete.c @@ -20,170 +20,72 @@ */ #include "globals.h" -LIST_HEAD(pending_cs); - -static inline void __run_complete(struct io *c_iop) +static inline void __out(FILE *ofp, __u64 tm, enum iop_type type, + __u64 sec, __u32 nsec, int indent) { - if (remapper_dev(c_iop->t.device)) { - struct bilink *blp = blp; - struct io *iop = bilink_first_down(c_iop, &blp); - - if (iop->type == IOP_Q) { - run_queue(iop, c_iop, c_iop); - biunlink(blp); - } - else - bilink_for_each_down(run_remap, c_iop, c_iop, 1); + if (tm != (__u64)-1) { + if (indent) + fprintf(ofp, " "); + fprintf(ofp, "%5d.%09lu %c %10llu+%-4u\n", + (int)SECONDS(tm), (unsigned long)NANO_SECONDS(tm), + type2c(type), (unsigned long long)sec, nsec); } - else - bilink_for_each_down(run_issue, c_iop, c_iop, 1); - - dump_iop(c_iop, 1); - - LIST_DEL(&c_iop->c_pending); - del_retry(c_iop); - add_rmhd(c_iop); +} - release_iops(); +static void display_io_track(FILE *ofp, struct io *iop) +{ + fprintf(ofp, "%3d,%-3d: ", MAJOR(iop->t.device), MINOR(iop->t.device)); + __out(ofp, iop->t.time, IOP_Q, iop->t.sector, t_sec(&iop->t), 0); + __out(ofp, iop->i_time, IOP_I, iop->t.sector, t_sec(&iop->t), 1); + __out(ofp, iop->gm_time, iop->is_getrq ? IOP_G : IOP_M, + iop->t.sector, t_sec(&iop->t), 1); + __out(ofp, iop->d_time, IOP_D, iop->d_sec, iop->d_nsec, 1); + __out(ofp, iop->c_time, IOP_C, iop->c_sec, iop->c_nsec, 1); + fprintf(ofp, "\n"); } -static int ready_complete_remapper(struct io *c_iop) +static void handle_complete(struct io *c_iop) { LIST_HEAD(head); struct list_head *p, *q; - dip_foreach_list(c_iop, IOP_L, &head); - if (list_empty(&head)) { - struct io *q_iop; - - dip_foreach_list(c_iop, IOP_Q, &head); - list_for_each_safe(p, q, &head) { - q_iop = list_entry(p, struct io, f_head); - LIST_DEL(&q_iop->f_head); + update_blks(c_iop); + update_cregion(&all_regions, c_iop->t.time); + update_cregion(&c_iop->dip->regions, c_iop->t.time); + if (c_iop->pip) + update_cregion(&c_iop->pip->regions, c_iop->t.time); - ASSERT(list_empty(&q_iop->up_list)); - ASSERT(list_empty(&q_iop->down_list)); - ASSERT(q_iop->t.bytes == c_iop->t.bytes); - if (ready_queue(q_iop, c_iop)) { - dip_rem(q_iop); - bilink(q_iop, c_iop); - c_iop->bytes_left -= q_iop->t.bytes; - } - } - } - else { - struct io *l_iop, *a_iop; - - list_for_each_safe(p, q, &head) { - l_iop = list_entry(p, struct io, f_head); - LIST_DEL(&l_iop->f_head); - - ASSERT(!list_empty(&l_iop->up_list)); - a_iop = bilink_first_up(l_iop, NULL); - if (ready_remap(a_iop, c_iop)) { - dip_rem(l_iop); - bilink(a_iop, c_iop); - c_iop->bytes_left -= a_iop->t.bytes; - } - } - } + dip_foreach_list(c_iop, IOP_Q, &head); + list_for_each_safe(p, q, &head) { + struct io *q_iop = list_entry(p, struct io, f_head); + __u64 q2c = tdelta(q_iop->t.time, c_iop->t.time); + __u64 d2c = tdelta(q_iop->d_time, c_iop->t.time); - return c_iop->bytes_left == 0; -} + c_iop->bytes_left -= q_iop->t.bytes; -int ready_complete(struct io *c_iop) -{ - __u64 d2c; - struct io *d_iop; + update_q2c(q_iop, q2c); + latency_q2c(q_iop->dip, q_iop->t.time, q2c); - if (c_iop->bytes_left == 0) - return 1; + update_d2c(q_iop, d2c); + latency_d2c(q_iop->dip, c_iop->t.time, d2c); + iostat_complete(q_iop, c_iop); - if (remapper_dev(c_iop->t.device)) - return ready_complete_remapper(c_iop); - - if (!list_empty(&c_iop->down_list)) - return 1; - - d_iop = dip_find_sec(c_iop->dip, IOP_D, BIT_START(c_iop)); - if (!d_iop) - return -1; + if (per_io_ofp) { + q_iop->c_time = c_iop->t.time; + q_iop->c_sec = c_iop->t.sector; + q_iop->c_nsec = t_sec(&c_iop->t); + display_io_track(per_io_ofp, q_iop); + } - if (c_iop->t.bytes != d_iop->t.bytes) { - fprintf(stderr, - "\nFATAL: Probable time anomaly detected\n"); - fprintf(stderr, - "D @ %15.9lf missing C, later C @ %15.9lf\n", - BIT_TIME(d_iop->t.time), - BIT_TIME(c_iop->t.time)); - exit(1); + LIST_DEL(&q_iop->f_head); + io_release(q_iop); } - - if (!ready_issue(d_iop, c_iop)) - return 0; - - c_iop->bytes_left = 0; - - d2c = tdelta(d_iop, c_iop); - update_d2c(d_iop, d_iop->down_len, d2c); - latency_d2c(d_iop->dip, c_iop->t.time, d2c); - iostat_complete(d_iop, c_iop); - - bilink(d_iop, c_iop); - dip_rem(d_iop); - return 1; } void trace_complete(struct io *c_iop) { - if (io_setup(c_iop, IOP_C)) { - update_blks(c_iop); - update_cregion(&all_regions, c_iop->t.time); - update_cregion(&c_iop->dip->regions, c_iop->t.time); - if (c_iop->pip) - update_cregion(&c_iop->pip->regions, c_iop->t.time); + if (io_setup(c_iop, IOP_C)) + handle_complete(c_iop); - list_add_tail(&c_iop->c_pending, &pending_cs); - switch (ready_complete(c_iop)) { - case 1: - __run_complete(c_iop); - break; - case 0: - add_retry(c_iop); - break; - case -1: - LIST_DEL(&c_iop->c_pending); - del_retry(c_iop); - io_release(c_iop); - break; - } - } - else - io_release(c_iop); -} - -void retry_complete(struct io *c_iop, __u64 now) -{ - double tc = BIT_TIME(c_iop->t.time); - - switch (ready_complete(c_iop)) { - case 1: -# if defined(DEBUG) - fprintf(stderr, "Retried %15.9lf success!\n", tc); -# endif - - __run_complete(c_iop); - break; - case 0: - if (now == 0 || ((BIT_TIME(now) - tc) < 1.0)) - break; - if (!list_empty(&c_iop->down_list)) - break; - /*FALLTHROUGH*/ - case -1: - LIST_DEL(&c_iop->c_pending); - del_retry(c_iop); - io_release(c_iop); - break; - } + io_release(c_iop); } |