summaryrefslogtreecommitdiff
path: root/btt/inlines.h
diff options
context:
space:
mode:
authorAlan D. Brunelle <alan.brunelle@hp.com>2007-02-06 20:46:16 +0100
committerJens Axboe <jens.axboe@oracle.com>2007-02-06 20:46:16 +0100
commitd76c5b81b99faca0959afcdd2e73330c61f69bfc (patch)
treecfb3df57c89960cd144fcf6437e617ea39f78aa4 /btt/inlines.h
parent0ac4c20e46fc644b1ac9e3021e3ebaa88b9c536f (diff)
downloadblktrace-d76c5b81b99faca0959afcdd2e73330c61f69bfc.tar.gz
blktrace-d76c5b81b99faca0959afcdd2e73330c61f69bfc.tar.bz2
[PATCH]: btt - major fixes and speed improvements
From: Alan D. Brunelle <Alan.Brunelle@hp.com> Lots of changes to how we handle traces - adds robustness & quicker This large patch contains the following changes to the trace handling aspects of btt: 1. Use larger buffers for output options. 2. Use mmap to handle the input of trace data. 3. More precise btt statistics are output at the end. 4. Added in (under DEBUG) the display of unhandled traces. I was running into the problem where traces were not being connected, and the rb trees would get quite large. This would slow things down considerably. (See below for details on why traces weren't being handled.) 5. Sprinkled some ASSERTs (under DEBUG). 6. Added a new btt-specific trace type: "links" - since 'A' (remaps) contain two separate pieces of information, I broke them up into a link and a remap trace. [Thus, it is easy to find either end of the remap.] 7. Added in the notion of retries of completes (and requeues). I'm finding some discrepencies in the time stamps, in order to make btt handle these better, I've added the notion of keeping the trace around for a bit, to see if it gets linked up later. 8. Separated trace streams into: simple IOs, and remapped IOs. 9. Fixed up D2C averages - Q2I + I2D + D2C should equal Q2C averages. ---------------------------------------------------------------------------- I do not understand why it is so, but I am seeing two 'C' (complete) traces for the same IO track at times. The sequence number is different (+1 for the second one), and the time stamps are different (100's of microseconds apart). I'm investigating this. At least on an IA64, I am seeing time inconsistencies amongst CPUs on very heavy loads (48 disks, 4 CPUs, almost 300 million traces). I find the 'D' (issue) and 'C' (complete) traces coming out ahead of the associate 'I' (insert) and 'M' (merge) traces. It would be good to get this fixed in the kernel, but I figure it is also goodness to attempt to account for it in post-processing as well. ---------------------------------------------------------------------------- This work was done in order to handle some of these large data sets, and I've found that the performance is reasonable - here are some stats for very large file (the largest of which used to take well over 12 minutes, now it takes about 5 1/2 minutes - and a lot of that is just getting the 18GiB of data read in): Size Real User System ----- -------- -------- ------- 7GiB 123.445s 80.188s 11.392s 10GiB 179.148s 137.456s 16.680s 13GiB 237.561s 156.992s 21.968s 16GiB 283.262s 187.468s 26.748s 18GiB 336.345s 225.084s 31.200s Signed-off-by: Alan D. Brunelle <Alan.Brunelle@hp.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'btt/inlines.h')
-rw-r--r--btt/inlines.h214
1 files changed, 184 insertions, 30 deletions
diff --git a/btt/inlines.h b/btt/inlines.h
index cfc7160..c93fee4 100644
--- a/btt/inlines.h
+++ b/btt/inlines.h
@@ -74,6 +74,23 @@ static inline void avg_update(struct avg_info *ap, __u64 t)
}
}
+static inline void avg_update_n(struct avg_info *ap, __u64 t, int n)
+{
+ if (ap->n == 0) {
+ ap->min = ap->max = t;
+ ap->total = (n * t);
+ }
+ else {
+ if (t < ap->min)
+ ap->min = t;
+ else if (t > ap->max)
+ ap->max = t;
+ ap->total += (n * t);
+ }
+
+ ap->n += n;
+}
+
static inline void avg_unupdate(struct avg_info *ap, __u64 t)
{
ap->n--;
@@ -105,6 +122,14 @@ static inline struct io *io_alloc(void)
iop = malloc(sizeof(struct io));
memset(iop, 0, sizeof(struct io));
+ INIT_LIST_HEAD(&iop->down_list);
+ INIT_LIST_HEAD(&iop->up_list);
+
+#if defined(DEBUG)
+ iop->f_head.next = LIST_POISON1;
+ iop->c_pending.next = LIST_POISON1;
+ iop->retry.next = LIST_POISON1;
+#endif
return iop;
}
@@ -123,8 +148,6 @@ static inline int io_setup(struct io *iop, enum iop_type type)
iop->dip = dip_add(iop->t.device, iop);
if (iop->linked) {
iop->pip = find_process(iop->t.pid, NULL);
- INIT_LIST_HEAD(&iop->down_list);
- INIT_LIST_HEAD(&iop->up_list);
iop->bytes_left = iop->t.bytes;
}
@@ -133,10 +156,17 @@ static inline int io_setup(struct io *iop, enum iop_type type)
static inline void io_release(struct io *iop)
{
+ ASSERT(iop->f_head.next == LIST_POISON1);
+ ASSERT(iop->c_pending.next == LIST_POISON1);
+ ASSERT(iop->retry.next == LIST_POISON1);
+ ASSERT(list_empty(&iop->up_list));
+ ASSERT(list_empty(&iop->down_list));
+
if (iop->linked)
dip_rem(iop);
if (iop->pdu)
free(iop->pdu);
+
io_free(iop);
}
@@ -146,6 +176,12 @@ static inline void io_release(struct io *iop)
if (_pip) avg_update(&_pip->avgs. _avg , _time); \
} while (0)
+#define UPDATE_AVGS_N(_avg, _iop, _pip, _time, _n) do { \
+ avg_update_n(&all_avgs. _avg , _time, _n); \
+ avg_update_n(&_iop->dip->avgs. _avg , _time, _n); \
+ if (_pip) avg_update_n(&_pip->avgs. _avg , _time,_n); \
+ } while (0)
+
#define UNUPDATE_AVGS(_avg, _iop, _pip, _time) do { \
avg_unupdate(&all_avgs. _avg , _time); \
avg_unupdate(&_iop->dip->avgs. _avg , _time); \
@@ -154,21 +190,38 @@ static inline void io_release(struct io *iop)
static inline void update_q2c(struct io *iop, __u64 c_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "q2c %13.9f\n", BIT_TIME(c_time));
+#endif
UPDATE_AVGS(q2c, iop, iop->pip, c_time);
}
static inline void update_q2a(struct io *iop, __u64 a_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "q2a %13.9f\n", BIT_TIME(a_time));
+#endif
UPDATE_AVGS(q2a, iop, iop->pip, a_time);
}
static inline void update_q2i(struct io *iop, __u64 i_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "q2i %13.9f\n", BIT_TIME(i_time));
+#endif
UPDATE_AVGS(q2i, iop, iop->pip, i_time);
}
+static inline void unupdate_q2i(struct io *iop, __u64 i_time)
+{
+ UNUPDATE_AVGS(q2i, iop, iop->pip, i_time);
+}
+
static inline void update_i2d(struct io *iop, __u64 d_time)
{
+#if defined(DEBUG)
+ if (per_io_ofp) fprintf(per_io_ofp, "i2d %13.9f\n", BIT_TIME(d_time));
+#endif
UPDATE_AVGS(i2d, iop, iop->pip, d_time);
}
@@ -177,9 +230,10 @@ static inline void unupdate_i2d(struct io *iop, __u64 d_time)
UNUPDATE_AVGS(i2d, iop, iop->pip, d_time);
}
-static inline void update_d2c(struct io *iop, __u64 c_time)
+static inline void update_d2c(struct io *iop, int n, __u64 c_time)
{
- UPDATE_AVGS(d2c, iop, iop->pip, c_time);
+ if (per_io_ofp) fprintf(per_io_ofp, "d2c %13.9f\n", n*BIT_TIME(c_time));
+ UPDATE_AVGS_N(d2c, iop, iop->pip, c_time, n);
}
static inline void update_blks(struct io *iop)
@@ -212,6 +266,9 @@ static inline int dip_rb_ins(struct d_info *dip, struct io *iop)
static inline void dip_rb_rem(struct io *iop)
{
rb_erase(&iop->rb_node, __get_root(iop->dip, iop->type));
+#if defined(DEBUG)
+ rb_tree_size--;
+#endif
}
static inline void dip_rb_fe(struct d_info *dip, enum iop_type type,
@@ -228,37 +285,17 @@ static inline struct io *dip_rb_find_sec(struct d_info *dip,
return rb_find_sec(__get_root(dip, type), sec);
}
-static inline struct io *list_first_down(struct io *iop)
+static inline void bump_retry(__u64 now)
{
- struct list_head *p = list_first(&iop->down_list);
- return p ? list_entry(p, struct io, up_head) : NULL;
-}
-
-static inline struct io *list_first_up(struct io *iop)
-{
- struct list_head *p = list_first(&iop->up_list);
- return p ? list_entry(p, struct io, down_head) : NULL;
-}
-
-static inline int list_empty_up(struct io *iop)
-{
- return list_empty(&iop->up_list);
-}
-
-static inline void __link(struct io *down_iop, struct io *up_iop)
-{
- list_add_tail(&down_iop->up_head, &up_iop->down_list);
- list_add_tail(&up_iop->down_head, &down_iop->up_list);
-}
-
-static inline void __unlink(struct io *down_iop, struct io *up_iop)
-{
- LIST_DEL(&down_iop->up_head);
- LIST_DEL(&up_iop->down_head);
+ if (!list_empty(&retries))
+ next_retry_check = now + (100 * 1000); // 100 usec
+ else
+ next_retry_check = 0;
}
static inline void add_retry(struct io *iop)
{
+ bump_retry(iop->t.time);
if (!iop->on_retry_list) {
list_add_tail(&iop->retry, &retries);
iop->on_retry_list = 1;
@@ -271,6 +308,7 @@ static inline void del_retry(struct io *iop)
LIST_DEL(&iop->retry);
iop->on_retry_list = 0;
}
+ bump_retry(iop->t.time);
}
static inline __u64 tdelta(struct io *iop1, struct io *iop2)
@@ -279,3 +317,119 @@ static inline __u64 tdelta(struct io *iop1, struct io *iop2)
__u64 t2 = iop2->t.time;
return (t1 < t2) ? (t2 - t1) : 1;
}
+
+static inline int remapper_dev(__u32 dev)
+{
+ int mjr = MAJOR(dev);
+ return mjr == 9 || mjr == 254; // 253? That's what is in blkparse.c
+ // But I see 254...
+}
+
+static inline void dump_iop(struct io *iop, int extra_nl)
+{
+ if (per_io_ofp)
+ __dump_iop(per_io_ofp, iop, extra_nl);
+}
+
+static inline void dump_iop2(struct io *a_iop, struct io *l_iop)
+{
+ if (per_io_ofp)
+ __dump_iop2(per_io_ofp, a_iop, l_iop);
+}
+
+static inline int type2c(enum iop_type type)
+{
+ int c;
+
+ switch (type) {
+ case IOP_Q: c = 'Q'; break;
+ case IOP_X: c = 'X'; break;
+ case IOP_A: c = 'A'; break;
+ case IOP_I: c = 'I'; break;
+ case IOP_M: c = 'M'; break;
+ case IOP_D: c = 'D'; break;
+ case IOP_C: c = 'C'; break;
+ case IOP_R: c = 'R'; break;
+ case IOP_L: c = 'L'; break;
+ default : c = '?'; break;
+ }
+
+ return c;
+}
+
+static inline void bilink_free(struct bilink *blp)
+{
+ free(blp);
+}
+
+static inline struct bilink *bilink_alloc(struct io *diop, struct io *uiop)
+{
+ struct bilink *blp = malloc(sizeof(*blp));
+
+ blp->diop = diop;
+ blp->uiop = uiop;
+
+ return blp;
+}
+
+static inline void bilink(struct io *diop, struct io *uiop)
+{
+ struct bilink *blp = bilink_alloc(diop, uiop);
+
+ list_add_tail(&blp->down_head, &diop->up_list);
+ list_add_tail(&blp->up_head, &uiop->down_list);
+
+ diop->up_len++;
+ uiop->down_len++;
+}
+
+static inline void biunlink(struct bilink *blp)
+{
+ LIST_DEL(&blp->down_head);
+ LIST_DEL(&blp->up_head);
+ blp->diop->up_len--;
+ blp->uiop->down_len--;
+ bilink_free(blp);
+}
+
+static inline struct io *bilink_first_down(struct io *iop,
+ struct bilink **blp_p)
+{
+ struct bilink *blp;
+
+ if (list_empty(&iop->down_list))
+ return NULL;
+ blp = list_entry(iop->down_list.next, struct bilink, up_head);
+
+ if (blp_p != NULL)
+ *blp_p = blp;
+ return blp->diop;
+}
+
+static inline struct io *bilink_first_up(struct io *iop, struct bilink **blp_p)
+{
+ struct bilink *blp;
+
+ if (list_empty(&iop->up_list))
+ return NULL;
+ blp = list_entry(iop->up_list.next, struct bilink, down_head);
+
+ if (blp_p != NULL)
+ *blp_p = blp;
+ return blp->diop;
+}
+
+typedef void (*bilink_func)(struct io *diop, struct io *uiop, void *param);
+static inline void bilink_for_each_down(bilink_func func, struct io *uiop,
+ void *param, int ul)
+{
+ struct bilink *blp;
+ struct list_head *p, *q;
+
+ list_for_each_safe(p, q, &uiop->down_list) {
+ blp = list_entry(p, struct bilink, up_head);
+ func(blp->diop, uiop, param);
+ if (ul)
+ biunlink(blp);
+ }
+}