[PATCH] blktrace: need to free ts->buf for networked transfer
[blktrace.git] / kernel / blk-trace-2.6.16-rc2-git-B2
CommitLineData
158f0231
JA
1diff --git a/block/Kconfig b/block/Kconfig
2index 377f6dd..27eaed9 100644
3--- a/block/Kconfig
4+++ b/block/Kconfig
5@@ -11,4 +11,15 @@ config LBD
6 your machine, or if you want to have a raid or loopback device
7 bigger than 2TB. Otherwise say N.
8
9+config BLK_DEV_IO_TRACE
10+ bool "Support for tracing block io actions"
11+ select RELAYFS_FS
12+ help
13+ Say Y here, if you want to be able to trace the block layer actions
14+ on a given queue. Tracing allows you to see any traffic happening
15+ on a block device queue. For more information (and the user space
16+ support tools needed), fetch the blktrace app from:
17+
18+ git://brick.kernel.dk/data/git/blktrace.git
19+
20 source block/Kconfig.iosched
21diff --git a/block/Makefile b/block/Makefile
22index 7e4f93e..c05de0e 100644
23--- a/block/Makefile
24+++ b/block/Makefile
25@@ -8,3 +8,5 @@ obj-$(CONFIG_IOSCHED_NOOP) += noop-iosch
26 obj-$(CONFIG_IOSCHED_AS) += as-iosched.o
27 obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
28 obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
29+
30+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
31diff --git a/block/blktrace.c b/block/blktrace.c
32new file mode 100644
57c9f4e8 33index 0000000..3b03587
158f0231
JA
34--- /dev/null
35+++ b/block/blktrace.c
57c9f4e8 36@@ -0,0 +1,499 @@
158f0231
JA
37+#include <linux/config.h>
38+#include <linux/kernel.h>
39+#include <linux/blkdev.h>
40+#include <linux/blktrace_api.h>
41+#include <linux/percpu.h>
42+#include <linux/init.h>
43+#include <linux/mutex.h>
44+#include <asm/uaccess.h>
45+
46+static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
57c9f4e8 47+static unsigned int blktrace_seq = 1;
158f0231 48+
62fb68f5
JA
49+static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
50+{
51+ struct blk_io_trace *t;
62fb68f5
JA
52+
53+ t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm));
54+ if (t) {
62fb68f5
JA
55+ t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
56+ t->device = bt->dev;
57+ t->action = BLK_TC_ACT(BLK_TC_NOTIFY);
58+ t->pid = tsk->pid;
59+ t->cpu = smp_processor_id();
60+ t->pdu_len = sizeof(tsk->comm);
61+ memcpy((void *) t + sizeof(*t), tsk->comm, t->pdu_len);
57c9f4e8 62+ tsk->btrace_seq = blktrace_seq;
62fb68f5 63+ }
62fb68f5
JA
64+}
65+
66+static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
67+ pid_t pid)
68+{
69+ if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
70+ return 1;
71+ if (sector < bt->start_lba || sector > bt->end_lba)
72+ return 1;
73+ if (bt->pid && pid != bt->pid)
74+ return 1;
75+
76+ return 0;
77+}
78+
79+/*
80+ * Data direction bit lookup
81+ */
82+static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
83+
84+/*
85+ * Bio action bits of interest
86+ */
87+static u32 bio_act[3] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC) };
88+
89+/*
90+ * More could be added as needed, taking care to increment the decrementer
91+ * to get correct indexing
92+ */
93+#define trace_barrier_bit(rw) \
94+ (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
95+#define trace_sync_bit(rw) \
96+ (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
97+
158f0231
JA
98+/*
99+ * The worker for the various blk_add_trace*() types. Fills out a
100+ * blk_io_trace structure and places it in a per-cpu subbuffer.
101+ */
102+void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
103+ int rw, u32 what, int error, int pdu_len, void *pdu_data)
104+{
62fb68f5 105+ struct task_struct *tsk = current;
158f0231
JA
106+ struct blk_io_trace *t;
107+ unsigned long flags;
108+ unsigned long *sequence;
2def0d98
JA
109+ unsigned long seq;
110+ u64 cpu_time;
158f0231
JA
111+ pid_t pid;
112+ int cpu;
113+
62fb68f5 114+ if (unlikely(bt->trace_state != Blktrace_running))
e62a6470
JA
115+ return;
116+
62fb68f5
JA
117+ what |= ddir_act[rw & WRITE];
118+ what |= bio_act[trace_barrier_bit(rw)];
119+ what |= bio_act[trace_sync_bit(rw)];
158f0231 120+
62fb68f5
JA
121+ pid = tsk->pid;
122+ if (unlikely(act_log_check(bt, what, sector, pid)))
158f0231
JA
123+ return;
124+
158f0231
JA
125+ /*
126+ * A word about the locking here - we disable interrupts to reserve
127+ * some space in the relayfs per-cpu buffer, to prevent an irq
128+ * from coming in and stepping on our toes. Once reserved, it's
129+ * enough to get preemption disabled to prevent read of this data
130+ * before we are through filling it. get_cpu()/put_cpu() does this
131+ * for us
132+ */
133+ local_irq_save(flags);
134+
57c9f4e8
JA
135+ if (unlikely(tsk->btrace_seq != blktrace_seq))
136+ trace_note_tsk(bt, tsk);
137+
158f0231
JA
138+ t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
139+ if (unlikely(!t)) {
140+ local_irq_restore(flags);
141+ return;
142+ }
143+
144+ cpu = get_cpu();
145+
146+ sequence = per_cpu_ptr(bt->sequence, cpu);
2def0d98
JA
147+ seq = ++(*sequence);
148+ cpu_time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
158f0231
JA
149+
150+ local_irq_restore(flags);
151+
152+ t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
2def0d98
JA
153+ t->sequence = seq;
154+ t->time = cpu_time;
158f0231
JA
155+ t->sector = sector;
156+ t->bytes = bytes;
157+ t->action = what;
2def0d98
JA
158+ t->pid = pid;
159+ t->device = bt->dev;
160+ t->cpu = cpu;
158f0231
JA
161+ t->error = error;
162+ t->pdu_len = pdu_len;
158f0231
JA
163+
164+ if (pdu_len)
165+ memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
166+
167+ put_cpu();
168+}
169+
170+EXPORT_SYMBOL_GPL(__blk_add_trace);
171+
172+static struct dentry *blk_tree_root;
173+static struct mutex blk_tree_mutex;
174+
175+static inline void blk_remove_root(void)
176+{
177+ if (relayfs_remove_dir(blk_tree_root) != -ENOTEMPTY)
178+ blk_tree_root = NULL;
179+}
180+
181+static void blk_remove_tree(struct dentry *dir)
182+{
183+ mutex_lock(&blk_tree_mutex);
184+ relayfs_remove_dir(dir);
185+ blk_remove_root();
186+ mutex_unlock(&blk_tree_mutex);
187+}
188+
189+static struct dentry *blk_create_tree(const char *blk_name)
190+{
191+ struct dentry *dir = NULL;
192+
193+ mutex_lock(&blk_tree_mutex);
194+
195+ if (!blk_tree_root) {
196+ blk_tree_root = relayfs_create_dir("block", NULL);
197+ if (!blk_tree_root)
198+ goto err;
199+ }
200+
201+ dir = relayfs_create_dir(blk_name, blk_tree_root);
202+ if (!dir)
203+ blk_remove_root();
204+
205+err:
206+ mutex_unlock(&blk_tree_mutex);
207+ return dir;
208+}
209+
e62a6470 210+void blk_trace_cleanup(struct blk_trace *bt)
158f0231
JA
211+{
212+ relay_close(bt->rchan);
213+ relayfs_remove_file(bt->dropped_file);
214+ blk_remove_tree(bt->dir);
57c9f4e8 215+ free_percpu(bt->sequence);
158f0231
JA
216+ kfree(bt);
217+}
218+
e62a6470 219+static int blk_trace_remove(request_queue_t *q)
158f0231 220+{
43ec3d6a 221+ struct blk_trace *bt;
158f0231 222+
43ec3d6a
JA
223+ bt = xchg(&q->blk_trace, NULL);
224+ if (!bt)
225+ return -EINVAL;
158f0231 226+
43ec3d6a
JA
227+ if (bt->trace_state == Blktrace_setup ||
228+ bt->trace_state == Blktrace_stopped)
229+ blk_trace_cleanup(bt);
158f0231 230+
43ec3d6a 231+ return 0;
158f0231
JA
232+}
233+
234+static int blk_dropped_open(struct inode *inode, struct file *filp)
235+{
236+ filp->private_data = inode->u.generic_ip;
237+
238+ return 0;
239+}
240+
241+static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
242+ size_t count, loff_t *ppos)
243+{
244+ struct blk_trace *bt = filp->private_data;
245+ char buf[16];
246+
247+ snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
248+
249+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
250+}
251+
252+static struct file_operations blk_dropped_fops = {
253+ .owner = THIS_MODULE,
254+ .open = blk_dropped_open,
255+ .read = blk_dropped_read,
256+};
257+
258+/*
259+ * Keep track of how many times we encountered a full subbuffer, to aid
260+ * the user space app in telling how many lost events there were.
261+ */
262+static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
263+ void *prev_subbuf, size_t prev_padding)
264+{
265+ struct blk_trace *bt;
266+
267+ if (!relay_buf_full(buf))
268+ return 1;
269+
270+ bt = buf->chan->private_data;
271+ atomic_inc(&bt->dropped);
272+ return 0;
273+}
274+
275+static struct rchan_callbacks blk_relay_callbacks = {
276+ .subbuf_start = blk_subbuf_start_callback,
277+};
278+
279+/*
280+ * Setup everything required to start tracing
281+ */
e62a6470
JA
282+static int blk_trace_setup(request_queue_t *q, struct block_device *bdev,
283+ char __user *arg)
158f0231 284+{
158f0231 285+ struct blk_user_trace_setup buts;
2def0d98 286+ struct blk_trace *old_bt, *bt = NULL;
158f0231
JA
287+ struct dentry *dir = NULL;
288+ char b[BDEVNAME_SIZE];
289+ int ret, i;
290+
158f0231
JA
291+ if (copy_from_user(&buts, arg, sizeof(buts)))
292+ return -EFAULT;
293+
294+ if (!buts.buf_size || !buts.buf_nr)
295+ return -EINVAL;
296+
297+ strcpy(buts.name, bdevname(bdev, b));
298+
299+ /*
300+ * some device names have larger paths - convert the slashes
301+ * to underscores for this to work as expected
302+ */
303+ for (i = 0; i < strlen(buts.name); i++)
304+ if (buts.name[i] == '/')
305+ buts.name[i] = '_';
306+
307+ if (copy_to_user(arg, &buts, sizeof(buts)))
308+ return -EFAULT;
309+
158f0231
JA
310+ ret = -ENOMEM;
311+ bt = kzalloc(sizeof(*bt), GFP_KERNEL);
312+ if (!bt)
313+ goto err;
314+
315+ bt->sequence = alloc_percpu(unsigned long);
316+ if (!bt->sequence)
317+ goto err;
318+
319+ ret = -ENOENT;
320+ dir = blk_create_tree(buts.name);
321+ if (!dir)
322+ goto err;
323+
324+ bt->dir = dir;
325+ bt->dev = bdev->bd_dev;
326+ atomic_set(&bt->dropped, 0);
327+
328+ ret = -EIO;
329+ bt->dropped_file = relayfs_create_file("dropped", dir, 0, &blk_dropped_fops, bt);
330+ if (!bt->dropped_file)
331+ goto err;
332+
333+ bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
334+ if (!bt->rchan)
335+ goto err;
336+ bt->rchan->private_data = bt;
337+
338+ bt->act_mask = buts.act_mask;
339+ if (!bt->act_mask)
340+ bt->act_mask = (u16) -1;
341+
342+ bt->start_lba = buts.start_lba;
343+ bt->end_lba = buts.end_lba;
344+ if (!bt->end_lba)
345+ bt->end_lba = -1ULL;
346+
347+ bt->pid = buts.pid;
e62a6470 348+ bt->trace_state = Blktrace_setup;
158f0231 349+
e62a6470 350+ ret = -EBUSY;
2def0d98
JA
351+ old_bt = xchg(&q->blk_trace, bt);
352+ if (old_bt) {
353+ xchg(&q->blk_trace, old_bt);
43ec3d6a 354+ goto err;
2def0d98 355+ }
43ec3d6a
JA
356+
357+ return 0;
158f0231 358+err:
158f0231
JA
359+ if (bt && bt->dropped_file)
360+ relayfs_remove_file(bt->dropped_file);
361+ if (dir)
362+ blk_remove_tree(dir);
363+ if (bt) {
364+ if (bt->sequence)
365+ free_percpu(bt->sequence);
366+ kfree(bt);
367+ }
368+ return ret;
369+}
370+
e62a6470
JA
371+static int blk_trace_startstop(request_queue_t *q, int start)
372+{
373+ struct blk_trace *bt;
374+ int ret;
375+
43ec3d6a
JA
376+ if ((bt = q->blk_trace) == NULL)
377+ return -EINVAL;
e62a6470
JA
378+
379+ /*
380+ * For starting a trace, we can transition from a setup or stopped
381+ * trace. For stopping a trace, the state must be running
382+ */
43ec3d6a
JA
383+ ret = -EINVAL;
384+ if (start) {
385+ if (bt->trace_state == Blktrace_setup ||
386+ bt->trace_state == Blktrace_stopped) {
57c9f4e8
JA
387+ blktrace_seq++;
388+ smp_mb();
43ec3d6a
JA
389+ bt->trace_state = Blktrace_running;
390+ ret = 0;
391+ }
392+ } else {
393+ if (bt->trace_state == Blktrace_running) {
394+ bt->trace_state = Blktrace_stopped;
395+ ret = 0;
e62a6470
JA
396+ }
397+ }
e62a6470
JA
398+
399+ return ret;
400+}
401+
402+/**
403+ * blk_trace_ioctl: - handle the ioctls associated with tracing
404+ * @bdev: the block device
405+ * @cmd: the ioctl cmd
406+ * @arg: the argument data, if any
407+ *
408+ **/
409+int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
410+{
411+ request_queue_t *q;
412+ int ret, start = 0;
413+
414+ q = bdev_get_queue(bdev);
415+ if (!q)
416+ return -ENXIO;
417+
418+ down(&bdev->bd_sem);
419+
420+ switch (cmd) {
421+ case BLKTRACESETUP:
422+ ret = blk_trace_setup(q, bdev, arg);
423+ break;
424+ case BLKTRACESTART:
425+ start = 1;
426+ case BLKTRACESTOP:
427+ ret = blk_trace_startstop(q, start);
428+ break;
429+ case BLKTRACETEARDOWN:
e62a6470 430+ ret = blk_trace_remove(q);
e62a6470
JA
431+ break;
432+ default:
433+ ret = -ENOTTY;
434+ break;
435+ }
436+
437+ up(&bdev->bd_sem);
438+ return ret;
439+}
440+
441+/**
442+ * blk_trace_shutdown: - stop and cleanup trace structures
443+ * @q: the request queue associated with the device
444+ *
445+ **/
446+void blk_trace_shutdown(request_queue_t *q)
447+{
448+ blk_trace_startstop(q, 0);
449+ blk_trace_remove(q);
450+}
451+
158f0231
JA
452+/*
453+ * Average offset over two calls to sched_clock() with a gettimeofday()
454+ * in the middle
455+ */
456+static void blk_check_time(unsigned long long *t)
457+{
458+ unsigned long long a, b;
459+ struct timeval tv;
460+
461+ a = sched_clock();
462+ do_gettimeofday(&tv);
463+ b = sched_clock();
464+
465+ *t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
466+ *t -= (a + b) / 2;
467+}
468+
469+static void blk_trace_check_cpu_time(void *data)
470+{
471+ unsigned long long *t;
472+ int cpu = get_cpu();
473+
474+ t = &per_cpu(blk_trace_cpu_offset, cpu);
475+
476+ /*
477+ * Just call it twice, hopefully the second call will be cache hot
478+ * and a little more precise
479+ */
480+ blk_check_time(t);
481+ blk_check_time(t);
482+
483+ put_cpu();
484+}
485+
486+/*
487+ * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
488+ * timings
489+ */
490+static void blk_trace_calibrate_offsets(void)
491+{
492+ unsigned long flags;
493+
494+ smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
495+ local_irq_save(flags);
496+ blk_trace_check_cpu_time(NULL);
497+ local_irq_restore(flags);
498+}
499+
500+static void blk_trace_set_ht_offsets(void)
501+{
502+#if defined(CONFIG_SCHED_SMT)
503+ int cpu, i;
504+
505+ /*
506+ * now make sure HT siblings have the same time offset
507+ */
508+ preempt_disable();
509+ for_each_online_cpu(cpu) {
510+ unsigned long long *cpu_off, *sibling_off;
511+
512+ for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
513+ if (i == cpu)
514+ continue;
515+
516+ cpu_off = &per_cpu(blk_trace_cpu_offset, cpu);
517+ sibling_off = &per_cpu(blk_trace_cpu_offset, i);
518+ *sibling_off = *cpu_off;
519+ }
520+ }
521+ preempt_enable();
522+#endif
523+}
524+
525+static __init int blk_trace_init(void)
526+{
527+ mutex_init(&blk_tree_mutex);
528+ blk_trace_calibrate_offsets();
529+ blk_trace_set_ht_offsets();
530+
531+ return 0;
532+}
533+
534+module_init(blk_trace_init);
535+
536diff --git a/block/elevator.c b/block/elevator.c
2def0d98 537index 24b702d..0c9fafe 100644
158f0231
JA
538--- a/block/elevator.c
539+++ b/block/elevator.c
540@@ -33,6 +33,7 @@
541 #include <linux/init.h>
542 #include <linux/compiler.h>
543 #include <linux/delay.h>
544+#include <linux/blktrace_api.h>
545
546 #include <asm/uaccess.h>
547
2def0d98 548@@ -315,6 +316,8 @@ void elv_insert(request_queue_t *q, stru
158f0231
JA
549 struct list_head *pos;
550 unsigned ordseq;
551
552+ blk_add_trace_rq(q, rq, BLK_TA_INSERT);
553+
2def0d98 554 rq->q = q;
158f0231 555
2def0d98
JA
556 switch (where) {
557@@ -481,6 +484,7 @@ struct request *elv_next_request(request
158f0231
JA
558 * not be passed by new incoming requests
559 */
560 rq->flags |= REQ_STARTED;
561+ blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
562 }
563
564 if (!q->boundary_rq || q->boundary_rq == rq) {
565diff --git a/block/ioctl.c b/block/ioctl.c
e62a6470 566index e110949..7acb56c 100644
158f0231
JA
567--- a/block/ioctl.c
568+++ b/block/ioctl.c
569@@ -5,6 +5,7 @@
570 #include <linux/backing-dev.h>
571 #include <linux/buffer_head.h>
572 #include <linux/smp_lock.h>
573+#include <linux/blktrace_api.h>
574 #include <asm/uaccess.h>
575
576 static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
e62a6470 577@@ -189,6 +190,11 @@ static int blkdev_locked_ioctl(struct fi
158f0231
JA
578 return put_ulong(arg, bdev->bd_inode->i_size >> 9);
579 case BLKGETSIZE64:
580 return put_u64(arg, bdev->bd_inode->i_size);
e62a6470
JA
581+ case BLKTRACESTART:
582+ case BLKTRACESTOP:
583+ case BLKTRACESETUP:
584+ case BLKTRACETEARDOWN:
585+ return blk_trace_ioctl(bdev, cmd, (char __user *) arg);
158f0231
JA
586 }
587 return -ENOIOCTLCMD;
588 }
589diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
2def0d98 590index 03d9c82..41387f5 100644
158f0231
JA
591--- a/block/ll_rw_blk.c
592+++ b/block/ll_rw_blk.c
593@@ -28,6 +28,7 @@
594 #include <linux/writeback.h>
595 #include <linux/interrupt.h>
596 #include <linux/cpu.h>
597+#include <linux/blktrace_api.h>
598
599 /*
600 * for max sense size
e62a6470 601@@ -1551,8 +1552,10 @@ void blk_plug_device(request_queue_t *q)
158f0231
JA
602 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
603 return;
604
605- if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
606+ if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
607 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
608+ blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
609+ }
610 }
611
612 EXPORT_SYMBOL(blk_plug_device);
e62a6470 613@@ -1616,14 +1619,21 @@ static void blk_backing_dev_unplug(struc
158f0231
JA
614 /*
615 * devices don't necessarily have an ->unplug_fn defined
616 */
617- if (q->unplug_fn)
618+ if (q->unplug_fn) {
619+ blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
620+ q->rq.count[READ] + q->rq.count[WRITE]);
621+
622 q->unplug_fn(q);
623+ }
624 }
625
626 static void blk_unplug_work(void *data)
627 {
628 request_queue_t *q = data;
629
630+ blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
631+ q->rq.count[READ] + q->rq.count[WRITE]);
632+
633 q->unplug_fn(q);
634 }
635
e62a6470 636@@ -1631,6 +1641,9 @@ static void blk_unplug_timeout(unsigned
158f0231
JA
637 {
638 request_queue_t *q = (request_queue_t *)data;
639
640+ blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
641+ q->rq.count[READ] + q->rq.count[WRITE]);
642+
643 kblockd_schedule_work(&q->unplug_work);
644 }
645
e62a6470 646@@ -1753,6 +1766,9 @@ void blk_cleanup_queue(request_queue_t *
158f0231
JA
647 if (q->queue_tags)
648 __blk_queue_free_tags(q);
649
e62a6470
JA
650+ if (q->blk_trace)
651+ blk_trace_shutdown(q);
158f0231
JA
652+
653 kmem_cache_free(requestq_cachep, q);
654 }
655
e62a6470 656@@ -2104,6 +2120,8 @@ rq_starved:
158f0231
JA
657
658 rq_init(q, rq);
659 rq->rl = rl;
660+
661+ blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
662 out:
663 return rq;
664 }
e62a6470 665@@ -2132,6 +2150,8 @@ static struct request *get_request_wait(
158f0231
JA
666 if (!rq) {
667 struct io_context *ioc;
668
669+ blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
670+
671 __generic_unplug_device(q);
672 spin_unlock_irq(q->queue_lock);
673 io_schedule();
e62a6470 674@@ -2185,6 +2205,8 @@ EXPORT_SYMBOL(blk_get_request);
158f0231
JA
675 */
676 void blk_requeue_request(request_queue_t *q, struct request *rq)
677 {
678+ blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
679+
680 if (blk_rq_tagged(rq))
681 blk_queue_end_tag(q, rq);
682
e62a6470 683@@ -2819,6 +2841,8 @@ static int __make_request(request_queue_
158f0231
JA
684 if (!q->back_merge_fn(q, req, bio))
685 break;
686
687+ blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
688+
689 req->biotail->bi_next = bio;
690 req->biotail = bio;
691 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
e62a6470 692@@ -2834,6 +2858,8 @@ static int __make_request(request_queue_
158f0231
JA
693 if (!q->front_merge_fn(q, req, bio))
694 break;
695
696+ blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
697+
698 bio->bi_next = req->bio;
699 req->bio = bio;
700
e62a6470 701@@ -2951,6 +2977,7 @@ void generic_make_request(struct bio *bi
158f0231
JA
702 request_queue_t *q;
703 sector_t maxsector;
704 int ret, nr_sectors = bio_sectors(bio);
705+ dev_t old_dev;
706
707 might_sleep();
708 /* Test device or partition size, when known. */
e62a6470 709@@ -2977,6 +3004,8 @@ void generic_make_request(struct bio *bi
158f0231
JA
710 * NOTE: we don't repeat the blk_size check for each new device.
711 * Stacking drivers are expected to know what they are doing.
712 */
713+ maxsector = -1;
714+ old_dev = 0;
715 do {
716 char b[BDEVNAME_SIZE];
717
e62a6470 718@@ -3009,6 +3038,15 @@ end_io:
158f0231
JA
719 */
720 blk_partition_remap(bio);
721
722+ if (maxsector != -1)
723+ blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
724+ maxsector);
725+
726+ blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
727+
728+ maxsector = bio->bi_sector;
729+ old_dev = bio->bi_bdev->bd_dev;
730+
731 ret = q->make_request_fn(q, bio);
732 } while (ret);
733 }
e62a6470 734@@ -3128,6 +3166,8 @@ static int __end_that_request_first(stru
158f0231
JA
735 int total_bytes, bio_nbytes, error, next_idx = 0;
736 struct bio *bio;
737
738+ blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
739+
740 /*
741 * extend uptodate bool to allow < 0 value to be direct io error
742 */
743diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
e62a6470 744index 0d65394..234048e 100644
158f0231
JA
745--- a/drivers/block/cciss.c
746+++ b/drivers/block/cciss.c
747@@ -38,6 +38,7 @@
748 #include <linux/hdreg.h>
749 #include <linux/spinlock.h>
750 #include <linux/compat.h>
751+#include <linux/blktrace_api.h>
752 #include <asm/uaccess.h>
753 #include <asm/io.h>
754
e62a6470 755@@ -2331,6 +2332,7 @@ static inline void complete_command( ctl
158f0231
JA
756
757 cmd->rq->completion_data = cmd;
758 cmd->rq->errors = status;
759+ blk_add_trace_rq(cmd->rq->q, cmd->rq, BLK_TA_COMPLETE);
760 blk_complete_request(cmd->rq);
761 }
762
763diff --git a/drivers/md/dm.c b/drivers/md/dm.c
764index e9adeb9..c8f3aa2 100644
765--- a/drivers/md/dm.c
766+++ b/drivers/md/dm.c
767@@ -17,6 +17,7 @@
768 #include <linux/mempool.h>
769 #include <linux/slab.h>
770 #include <linux/idr.h>
771+#include <linux/blktrace_api.h>
772
773 static const char *_name = DM_NAME;
774
775@@ -334,6 +335,8 @@ static void dec_pending(struct dm_io *io
776 /* nudge anyone waiting on suspend queue */
777 wake_up(&io->md->wait);
778
779+ blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE);
780+
781 bio_endio(io->bio, io->bio->bi_size, io->error);
782 free_io(io->md, io);
783 }
784@@ -392,6 +395,7 @@ static void __map_bio(struct dm_target *
785 struct target_io *tio)
786 {
787 int r;
788+ sector_t sector;
789
790 /*
791 * Sanity checks.
792@@ -407,10 +411,17 @@ static void __map_bio(struct dm_target *
793 * this io.
794 */
795 atomic_inc(&tio->io->io_count);
796+ sector = clone->bi_sector;
797 r = ti->type->map(ti, clone, &tio->info);
798- if (r > 0)
799+ if (r > 0) {
800 /* the bio has been remapped so dispatch it */
801+
802+ blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
803+ tio->io->bio->bi_bdev->bd_dev, sector,
804+ clone->bi_sector);
805+
806 generic_make_request(clone);
807+ }
808
809 else if (r < 0) {
810 /* error the io and bail out */
811diff --git a/fs/bio.c b/fs/bio.c
812index 1f3bb50..0dd0d81 100644
813--- a/fs/bio.c
814+++ b/fs/bio.c
815@@ -25,6 +25,7 @@
816 #include <linux/module.h>
817 #include <linux/mempool.h>
818 #include <linux/workqueue.h>
819+#include <linux/blktrace_api.h>
820 #include <scsi/sg.h> /* for struct sg_iovec */
821
822 #define BIO_POOL_SIZE 256
823@@ -1095,6 +1096,9 @@ struct bio_pair *bio_split(struct bio *b
824 if (!bp)
825 return bp;
826
827+ blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
828+ bi->bi_sector + first_sectors);
829+
830 BUG_ON(bi->bi_vcnt != 1);
831 BUG_ON(bi->bi_idx != 0);
832 atomic_set(&bp->cnt, 3);
833diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
2def0d98 834index 057e602..893d600 100644
158f0231
JA
835--- a/fs/compat_ioctl.c
836+++ b/fs/compat_ioctl.c
837@@ -72,6 +72,7 @@
838 #include <linux/i2c-dev.h>
839 #include <linux/wireless.h>
840 #include <linux/atalk.h>
841+#include <linux/blktrace_api.h>
842
843 #include <net/sock.h> /* siocdevprivate_ioctl */
844 #include <net/bluetooth/bluetooth.h>
845diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
846index 860e7a4..266ce9d 100644
847--- a/include/linux/blkdev.h
848+++ b/include/linux/blkdev.h
849@@ -22,6 +22,7 @@ typedef struct request_queue request_que
850 struct elevator_queue;
851 typedef struct elevator_queue elevator_t;
852 struct request_pm_state;
853+struct blk_trace;
854
855 #define BLKDEV_MIN_RQ 4
856 #define BLKDEV_MAX_RQ 128 /* Default maximum */
857@@ -416,6 +417,8 @@ struct request_queue
858 unsigned int sg_reserved_size;
859 int node;
860
861+ struct blk_trace *blk_trace;
862+
863 /*
864 * reserved for flush operations
865 */
866diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
867new file mode 100644
62fb68f5 868index 0000000..fca40ef
158f0231
JA
869--- /dev/null
870+++ b/include/linux/blktrace_api.h
e62a6470 871@@ -0,0 +1,277 @@
158f0231
JA
872+#ifndef BLKTRACE_H
873+#define BLKTRACE_H
874+
875+#include <linux/config.h>
876+#include <linux/blkdev.h>
877+#include <linux/relayfs_fs.h>
878+
879+/*
880+ * Trace categories
881+ */
882+enum blktrace_cat {
883+ BLK_TC_READ = 1 << 0, /* reads */
884+ BLK_TC_WRITE = 1 << 1, /* writes */
885+ BLK_TC_BARRIER = 1 << 2, /* barrier */
886+ BLK_TC_SYNC = 1 << 3, /* barrier */
887+ BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
888+ BLK_TC_REQUEUE = 1 << 5, /* requeueing */
889+ BLK_TC_ISSUE = 1 << 6, /* issue */
890+ BLK_TC_COMPLETE = 1 << 7, /* completions */
891+ BLK_TC_FS = 1 << 8, /* fs requests */
892+ BLK_TC_PC = 1 << 9, /* pc requests */
62fb68f5 893+ BLK_TC_NOTIFY = 1 << 10, /* special message */
158f0231
JA
894+
895+ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
896+};
897+
898+#define BLK_TC_SHIFT (16)
899+#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
900+
901+/*
902+ * Basic trace actions
903+ */
904+enum blktrace_act {
905+ __BLK_TA_QUEUE = 1, /* queued */
906+ __BLK_TA_BACKMERGE, /* back merged to existing rq */
907+ __BLK_TA_FRONTMERGE, /* front merge to existing rq */
908+ __BLK_TA_GETRQ, /* allocated new request */
909+ __BLK_TA_SLEEPRQ, /* sleeping on rq allocation */
910+ __BLK_TA_REQUEUE, /* request requeued */
911+ __BLK_TA_ISSUE, /* sent to driver */
912+ __BLK_TA_COMPLETE, /* completed by driver */
913+ __BLK_TA_PLUG, /* queue was plugged */
914+ __BLK_TA_UNPLUG_IO, /* queue was unplugged by io */
915+ __BLK_TA_UNPLUG_TIMER, /* queue was unplugged by timer */
916+ __BLK_TA_INSERT, /* insert request */
917+ __BLK_TA_SPLIT, /* bio was split */
918+ __BLK_TA_BOUNCE, /* bio was bounced */
919+ __BLK_TA_REMAP, /* bio was remapped */
920+};
921+
922+/*
923+ * Trace actions in full. Additionally, read or write is masked
924+ */
925+#define BLK_TA_QUEUE (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
926+#define BLK_TA_BACKMERGE (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
927+#define BLK_TA_FRONTMERGE (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
928+#define BLK_TA_GETRQ (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
929+#define BLK_TA_SLEEPRQ (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
930+#define BLK_TA_REQUEUE (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
931+#define BLK_TA_ISSUE (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
932+#define BLK_TA_COMPLETE (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
933+#define BLK_TA_PLUG (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
934+#define BLK_TA_UNPLUG_IO (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
935+#define BLK_TA_UNPLUG_TIMER (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
936+#define BLK_TA_INSERT (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
937+#define BLK_TA_SPLIT (__BLK_TA_SPLIT)
938+#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
939+#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
940+
941+#define BLK_IO_TRACE_MAGIC 0x65617400
62fb68f5 942+#define BLK_IO_TRACE_VERSION 0x07
158f0231
JA
943+
944+/*
945+ * The trace itself
946+ */
947+struct blk_io_trace {
948+ u32 magic; /* MAGIC << 8 | version */
949+ u32 sequence; /* event number */
950+ u64 time; /* in microseconds */
951+ u64 sector; /* disk offset */
952+ u32 bytes; /* transfer length */
953+ u32 action; /* what happened */
954+ u32 pid; /* who did it */
62fb68f5 955+ u32 device; /* device number */
158f0231
JA
956+ u32 cpu; /* on what cpu did it happen */
957+ u16 error; /* completion error */
958+ u16 pdu_len; /* length of data after this trace */
158f0231
JA
959+};
960+
961+/*
962+ * The remap event
963+ */
964+struct blk_io_trace_remap {
965+ u32 device;
966+ u32 __pad;
967+ u64 sector;
968+};
969+
e62a6470
JA
970+enum {
971+ Blktrace_setup = 1,
972+ Blktrace_running,
973+ Blktrace_stopped,
974+};
975+
158f0231 976+struct blk_trace {
e62a6470 977+ int trace_state;
158f0231
JA
978+ struct dentry *dir;
979+ struct rchan *rchan;
980+ struct dentry *dropped_file;
981+ atomic_t dropped;
982+ unsigned long *sequence;
983+ u32 dev;
984+ u16 act_mask;
985+ u64 start_lba;
986+ u64 end_lba;
987+ u32 pid;
988+};
989+
990+/*
e62a6470 991+ * User setup structure passed with BLKTRACESTART
158f0231
JA
992+ */
993+struct blk_user_trace_setup {
994+ char name[BDEVNAME_SIZE]; /* output */
995+ u16 act_mask; /* input */
996+ u32 buf_size; /* input */
997+ u32 buf_nr; /* input */
998+ u64 start_lba;
999+ u64 end_lba;
1000+ u32 pid;
1001+};
1002+
1003+#if defined(CONFIG_BLK_DEV_IO_TRACE)
e62a6470
JA
1004+extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
1005+extern void blk_trace_shutdown(request_queue_t *);
158f0231
JA
1006+extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
1007+
1008+/**
1009+ * blk_add_trace_rq - Add a trace for a request oriented action
1010+ * @q: queue the io is for
1011+ * @rq: the source request
1012+ * @what: the action
1013+ *
1014+ * Description:
1015+ * Records an action against a request. Will log the bio offset + size.
1016+ *
1017+ **/
1018+static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
1019+ u32 what)
1020+{
1021+ struct blk_trace *bt = q->blk_trace;
1022+ int rw = rq->flags & 0x07;
1023+
1024+ if (likely(!bt))
1025+ return;
1026+
1027+ if (blk_pc_request(rq)) {
1028+ what |= BLK_TC_ACT(BLK_TC_PC);
1029+ __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
1030+ } else {
1031+ what |= BLK_TC_ACT(BLK_TC_FS);
1032+ __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
1033+ }
1034+}
1035+
1036+/**
1037+ * blk_add_trace_bio - Add a trace for a bio oriented action
1038+ * @q: queue the io is for
1039+ * @bio: the source bio
1040+ * @what: the action
1041+ *
1042+ * Description:
1043+ * Records an action against a bio. Will log the bio offset + size.
1044+ *
1045+ **/
1046+static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
1047+ u32 what)
1048+{
1049+ struct blk_trace *bt = q->blk_trace;
1050+
1051+ if (likely(!bt))
1052+ return;
1053+
1054+ __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
1055+}
1056+
1057+/**
1058+ * blk_add_trace_generic - Add a trace for a generic action
1059+ * @q: queue the io is for
1060+ * @bio: the source bio
1061+ * @rw: the data direction
1062+ * @what: the action
1063+ *
1064+ * Description:
1065+ * Records a simple trace
1066+ *
1067+ **/
1068+static inline void blk_add_trace_generic(struct request_queue *q,
1069+ struct bio *bio, int rw, u32 what)
1070+{
1071+ struct blk_trace *bt = q->blk_trace;
1072+
1073+ if (likely(!bt))
1074+ return;
1075+
1076+ if (bio)
1077+ blk_add_trace_bio(q, bio, what);
1078+ else
1079+ __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
1080+}
1081+
1082+/**
1083+ * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload
1084+ * @q: queue the io is for
1085+ * @what: the action
1086+ * @bio: the source bio
1087+ * @pdu: the integer payload
1088+ *
1089+ * Description:
1090+ * Adds a trace with some integer payload. This might be an unplug
1091+ * option given as the action, with the depth at unplug time given
1092+ * as the payload
1093+ *
1094+ **/
1095+static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
1096+ struct bio *bio, unsigned int pdu)
1097+{
1098+ struct blk_trace *bt = q->blk_trace;
1099+ u64 rpdu = cpu_to_be64(pdu);
1100+
1101+ if (likely(!bt))
1102+ return;
1103+
1104+ if (bio)
1105+ __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
1106+ else
1107+ __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
1108+}
1109+
1110+/**
1111+ * blk_add_trace_remap - Add a trace for a remap operation
1112+ * @q: queue the io is for
1113+ * @bio: the source bio
1114+ * @dev: target device
1115+ * @from: source sector
1116+ * @to: target sector
1117+ *
1118+ * Description:
1119+ * Device mapper or raid target sometimes need to split a bio because
1120+ * it spans a stripe (or similar). Add a trace for that action.
1121+ *
1122+ **/
1123+static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
1124+ dev_t dev, sector_t from, sector_t to)
1125+{
1126+ struct blk_trace *bt = q->blk_trace;
1127+ struct blk_io_trace_remap r;
1128+
1129+ if (likely(!bt))
1130+ return;
1131+
1132+ r.device = cpu_to_be32(dev);
1133+ r.sector = cpu_to_be64(to);
1134+
1135+ __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
1136+}
1137+
1138+#else /* !CONFIG_BLK_DEV_IO_TRACE */
e62a6470
JA
1139+#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
1140+#define blk_trace_shutdown(q) do { } while (0)
158f0231
JA
1141+#define blk_add_trace_rq(q, rq, what) do { } while (0)
1142+#define blk_add_trace_bio(q, rq, what) do { } while (0)
1143+#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
1144+#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
1145+#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
1146+#endif /* CONFIG_BLK_DEV_IO_TRACE */
1147+
1148+#endif
1149diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
e62a6470 1150index 8fad50f..a3ed64b 100644
158f0231
JA
1151--- a/include/linux/compat_ioctl.h
1152+++ b/include/linux/compat_ioctl.h
e62a6470 1153@@ -97,6 +97,10 @@ COMPATIBLE_IOCTL(BLKRRPART)
158f0231
JA
1154 COMPATIBLE_IOCTL(BLKFLSBUF)
1155 COMPATIBLE_IOCTL(BLKSECTSET)
1156 COMPATIBLE_IOCTL(BLKSSZGET)
e62a6470
JA
1157+COMPATIBLE_IOCTL(BLKTRACESTART)
1158+COMPATIBLE_IOCTL(BLKTRACESTOP)
1159+COMPATIBLE_IOCTL(BLKTRACESETUP)
1160+COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
158f0231
JA
1161 ULONG_IOCTL(BLKRASET)
1162 ULONG_IOCTL(BLKFRASET)
1163 /* RAID */
1164diff --git a/include/linux/fs.h b/include/linux/fs.h
e62a6470 1165index e059da9..c7a63cd 100644
158f0231
JA
1166--- a/include/linux/fs.h
1167+++ b/include/linux/fs.h
e62a6470 1168@@ -196,6 +196,10 @@ extern int dir_notify_enable;
158f0231
JA
1169 #define BLKBSZGET _IOR(0x12,112,size_t)
1170 #define BLKBSZSET _IOW(0x12,113,size_t)
1171 #define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
e62a6470
JA
1172+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
1173+#define BLKTRACESTART _IO(0x12,116)
1174+#define BLKTRACESTOP _IO(0x12,117)
1175+#define BLKTRACETEARDOWN _IO(0x12,118)
158f0231
JA
1176
1177 #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
1178 #define FIBMAP _IO(0x00,1) /* bmap access */
62fb68f5 1179diff --git a/include/linux/sched.h b/include/linux/sched.h
57c9f4e8 1180index 0cfcd1c..69cd119 100644
62fb68f5
JA
1181--- a/include/linux/sched.h
1182+++ b/include/linux/sched.h
57c9f4e8
JA
1183@@ -708,6 +708,7 @@ struct task_struct {
1184 prio_array_t *array;
62fb68f5 1185
57c9f4e8
JA
1186 unsigned short ioprio;
1187+ unsigned int btrace_seq;
1188
1189 unsigned long sleep_avg;
1190 unsigned long long timestamp, last_ran;
1191diff --git a/kernel/fork.c b/kernel/fork.c
1192index 8e88b37..60f838f 100644
1193--- a/kernel/fork.c
1194+++ b/kernel/fork.c
1195@@ -179,6 +179,7 @@ static struct task_struct *dup_task_stru
1196 /* One for us, one for whoever does the "release_task()" (usually parent) */
1197 atomic_set(&tsk->usage,2);
1198 atomic_set(&tsk->fs_excl, 0);
1199+ tsk->btrace_seq = 0;
1200 return tsk;
1201 }
62fb68f5 1202
158f0231
JA
1203diff --git a/mm/highmem.c b/mm/highmem.c
1204index ce2e7e8..d0ea1ee 100644
1205--- a/mm/highmem.c
1206+++ b/mm/highmem.c
1207@@ -26,6 +26,7 @@
1208 #include <linux/init.h>
1209 #include <linux/hash.h>
1210 #include <linux/highmem.h>
1211+#include <linux/blktrace_api.h>
1212 #include <asm/tlbflush.h>
1213
1214 static mempool_t *page_pool, *isa_page_pool;
1215@@ -483,6 +484,8 @@ void blk_queue_bounce(request_queue_t *q
1216 pool = isa_page_pool;
1217 }
1218
1219+ blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
1220+
1221 /*
1222 * slow path
1223 */