summaryrefslogtreecommitdiff
path: root/btt/trace_complete.c
diff options
context:
space:
mode:
authorAlan D. Brunelle <Alan.Brunelle@hp.com>2007-09-10 18:37:49 +0200
committerJens Axboe <jens.axboe@oracle.com>2007-09-10 18:37:49 +0200
commit4c48f14ea8ae2fae86811ac4dc1d72ad9bb601c2 (patch)
tree97a01448fa090ed0aacf979e640bf77e40b637bf /btt/trace_complete.c
parenta5981e2e795319502cc8dda629482928f3b7b204 (diff)
downloadblktrace-4c48f14ea8ae2fae86811ac4dc1d72ad9bb601c2.tar.gz
blktrace-4c48f14ea8ae2fae86811ac4dc1d72ad9bb601c2.tar.bz2
Major revamping (ver 2.0)
After a lot of fighting with maintaining a tree-styled design (each trace having it's own node), it was just getting too cumbersome to work in all circumstances. Taking a clue from blkparse itself, I decided to just keep track of IOs at queue time, and updating fields based upon later traces. The attached (large) patch works much faster, handles larger test cases with less failures, and is managing some pretty large jobs I'm working on (large Oracle-based DB analysis - 32-way box w/ lots of storage). I've also added a Q2Q seek distance feature - it's come in handy when comparing results of IO scheduler choice: We can see what the incoming IO seek distances are (at queue time), and then see how the scheduler itself manages things (via merges & sorting) by looking at D2D seek distances generated. As noted in the subject, I arbitrarily bumped this to version 2.00 as the innards are so different. The documentation (btt/doc/btt.tex) has been updated to reflect some minor output changes. I also fixed a bug dealing with process name notification: there was a problem that if a new PID came up with a name that was previously seen, btt wouldn't keep track of it right. [When running with Oracle, a lot of processes have the same name but different PIDs of course.] Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'btt/trace_complete.c')
-rw-r--r--btt/trace_complete.c192
1 files changed, 47 insertions, 145 deletions
diff --git a/btt/trace_complete.c b/btt/trace_complete.c
index b48bf79..c51d43d 100644
--- a/btt/trace_complete.c
+++ b/btt/trace_complete.c
@@ -20,170 +20,72 @@
*/
#include "globals.h"
-LIST_HEAD(pending_cs);
-
-static inline void __run_complete(struct io *c_iop)
+static inline void __out(FILE *ofp, __u64 tm, enum iop_type type,
+ __u64 sec, __u32 nsec, int indent)
{
- if (remapper_dev(c_iop->t.device)) {
- struct bilink *blp = blp;
- struct io *iop = bilink_first_down(c_iop, &blp);
-
- if (iop->type == IOP_Q) {
- run_queue(iop, c_iop, c_iop);
- biunlink(blp);
- }
- else
- bilink_for_each_down(run_remap, c_iop, c_iop, 1);
+ if (tm != (__u64)-1) {
+ if (indent)
+ fprintf(ofp, " ");
+ fprintf(ofp, "%5d.%09lu %c %10llu+%-4u\n",
+ (int)SECONDS(tm), (unsigned long)NANO_SECONDS(tm),
+ type2c(type), (unsigned long long)sec, nsec);
}
- else
- bilink_for_each_down(run_issue, c_iop, c_iop, 1);
-
- dump_iop(c_iop, 1);
-
- LIST_DEL(&c_iop->c_pending);
- del_retry(c_iop);
- add_rmhd(c_iop);
+}
- release_iops();
+static void display_io_track(FILE *ofp, struct io *iop)
+{
+ fprintf(ofp, "%3d,%-3d: ", MAJOR(iop->t.device), MINOR(iop->t.device));
+ __out(ofp, iop->t.time, IOP_Q, iop->t.sector, t_sec(&iop->t), 0);
+ __out(ofp, iop->i_time, IOP_I, iop->t.sector, t_sec(&iop->t), 1);
+ __out(ofp, iop->gm_time, iop->is_getrq ? IOP_G : IOP_M,
+ iop->t.sector, t_sec(&iop->t), 1);
+ __out(ofp, iop->d_time, IOP_D, iop->d_sec, iop->d_nsec, 1);
+ __out(ofp, iop->c_time, IOP_C, iop->c_sec, iop->c_nsec, 1);
+ fprintf(ofp, "\n");
}
-static int ready_complete_remapper(struct io *c_iop)
+static void handle_complete(struct io *c_iop)
{
LIST_HEAD(head);
struct list_head *p, *q;
- dip_foreach_list(c_iop, IOP_L, &head);
- if (list_empty(&head)) {
- struct io *q_iop;
-
- dip_foreach_list(c_iop, IOP_Q, &head);
- list_for_each_safe(p, q, &head) {
- q_iop = list_entry(p, struct io, f_head);
- LIST_DEL(&q_iop->f_head);
+ update_blks(c_iop);
+ update_cregion(&all_regions, c_iop->t.time);
+ update_cregion(&c_iop->dip->regions, c_iop->t.time);
+ if (c_iop->pip)
+ update_cregion(&c_iop->pip->regions, c_iop->t.time);
- ASSERT(list_empty(&q_iop->up_list));
- ASSERT(list_empty(&q_iop->down_list));
- ASSERT(q_iop->t.bytes == c_iop->t.bytes);
- if (ready_queue(q_iop, c_iop)) {
- dip_rem(q_iop);
- bilink(q_iop, c_iop);
- c_iop->bytes_left -= q_iop->t.bytes;
- }
- }
- }
- else {
- struct io *l_iop, *a_iop;
-
- list_for_each_safe(p, q, &head) {
- l_iop = list_entry(p, struct io, f_head);
- LIST_DEL(&l_iop->f_head);
-
- ASSERT(!list_empty(&l_iop->up_list));
- a_iop = bilink_first_up(l_iop, NULL);
- if (ready_remap(a_iop, c_iop)) {
- dip_rem(l_iop);
- bilink(a_iop, c_iop);
- c_iop->bytes_left -= a_iop->t.bytes;
- }
- }
- }
+ dip_foreach_list(c_iop, IOP_Q, &head);
+ list_for_each_safe(p, q, &head) {
+ struct io *q_iop = list_entry(p, struct io, f_head);
+ __u64 q2c = tdelta(q_iop->t.time, c_iop->t.time);
+ __u64 d2c = tdelta(q_iop->d_time, c_iop->t.time);
- return c_iop->bytes_left == 0;
-}
+ c_iop->bytes_left -= q_iop->t.bytes;
-int ready_complete(struct io *c_iop)
-{
- __u64 d2c;
- struct io *d_iop;
+ update_q2c(q_iop, q2c);
+ latency_q2c(q_iop->dip, q_iop->t.time, q2c);
- if (c_iop->bytes_left == 0)
- return 1;
+ update_d2c(q_iop, d2c);
+ latency_d2c(q_iop->dip, c_iop->t.time, d2c);
+ iostat_complete(q_iop, c_iop);
- if (remapper_dev(c_iop->t.device))
- return ready_complete_remapper(c_iop);
-
- if (!list_empty(&c_iop->down_list))
- return 1;
-
- d_iop = dip_find_sec(c_iop->dip, IOP_D, BIT_START(c_iop));
- if (!d_iop)
- return -1;
+ if (per_io_ofp) {
+ q_iop->c_time = c_iop->t.time;
+ q_iop->c_sec = c_iop->t.sector;
+ q_iop->c_nsec = t_sec(&c_iop->t);
+ display_io_track(per_io_ofp, q_iop);
+ }
- if (c_iop->t.bytes != d_iop->t.bytes) {
- fprintf(stderr,
- "\nFATAL: Probable time anomaly detected\n");
- fprintf(stderr,
- "D @ %15.9lf missing C, later C @ %15.9lf\n",
- BIT_TIME(d_iop->t.time),
- BIT_TIME(c_iop->t.time));
- exit(1);
+ LIST_DEL(&q_iop->f_head);
+ io_release(q_iop);
}
-
- if (!ready_issue(d_iop, c_iop))
- return 0;
-
- c_iop->bytes_left = 0;
-
- d2c = tdelta(d_iop, c_iop);
- update_d2c(d_iop, d_iop->down_len, d2c);
- latency_d2c(d_iop->dip, c_iop->t.time, d2c);
- iostat_complete(d_iop, c_iop);
-
- bilink(d_iop, c_iop);
- dip_rem(d_iop);
- return 1;
}
void trace_complete(struct io *c_iop)
{
- if (io_setup(c_iop, IOP_C)) {
- update_blks(c_iop);
- update_cregion(&all_regions, c_iop->t.time);
- update_cregion(&c_iop->dip->regions, c_iop->t.time);
- if (c_iop->pip)
- update_cregion(&c_iop->pip->regions, c_iop->t.time);
+ if (io_setup(c_iop, IOP_C))
+ handle_complete(c_iop);
- list_add_tail(&c_iop->c_pending, &pending_cs);
- switch (ready_complete(c_iop)) {
- case 1:
- __run_complete(c_iop);
- break;
- case 0:
- add_retry(c_iop);
- break;
- case -1:
- LIST_DEL(&c_iop->c_pending);
- del_retry(c_iop);
- io_release(c_iop);
- break;
- }
- }
- else
- io_release(c_iop);
-}
-
-void retry_complete(struct io *c_iop, __u64 now)
-{
- double tc = BIT_TIME(c_iop->t.time);
-
- switch (ready_complete(c_iop)) {
- case 1:
-# if defined(DEBUG)
- fprintf(stderr, "Retried %15.9lf success!\n", tc);
-# endif
-
- __run_complete(c_iop);
- break;
- case 0:
- if (now == 0 || ((BIT_TIME(now) - tc) < 1.0))
- break;
- if (!list_empty(&c_iop->down_list))
- break;
- /*FALLTHROUGH*/
- case -1:
- LIST_DEL(&c_iop->c_pending);
- del_retry(c_iop);
- io_release(c_iop);
- break;
- }
+ io_release(c_iop);
}