[PATCH] blktrace.tex: add description of each possible action
[blktrace.git] / kernel / blk-trace-2.6.14-rc3-git-K0
1 diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
2 --- a/drivers/block/Kconfig
3 +++ b/drivers/block/Kconfig
4 @@ -419,6 +419,14 @@ config LBD
5           your machine, or if you want to have a raid or loopback device
6           bigger than 2TB.  Otherwise say N.
7  
8 +config BLK_DEV_IO_TRACE
9 +       bool "Support for tracing block io actions"
10 +       select RELAYFS_FS
11 +       help
12 +         Say Y here, if you want to be able to trace the block layer actions
13 +         on a given queue.
14 +
15 +
16  config CDROM_PKTCDVD
17         tristate "Packet writing on CD/DVD media"
18         depends on !UML
19 diff --git a/drivers/block/Makefile b/drivers/block/Makefile
20 --- a/drivers/block/Makefile
21 +++ b/drivers/block/Makefile
22 @@ -45,3 +45,5 @@ obj-$(CONFIG_VIODASD)         += viodasd.o
23  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
24  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
25  
26 +obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
27 +
28 diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
29 --- a/drivers/block/elevator.c
30 +++ b/drivers/block/elevator.c
31 @@ -34,6 +34,7 @@
32  #include <linux/slab.h>
33  #include <linux/init.h>
34  #include <linux/compiler.h>
35 +#include <linux/blktrace.h>
36  
37  #include <asm/uaccess.h>
38  
39 @@ -305,6 +306,8 @@ void elv_requeue_request(request_queue_t
40  void __elv_add_request(request_queue_t *q, struct request *rq, int where,
41                        int plug)
42  {
43 +       blk_add_trace_rq(q, rq, BLK_TA_INSERT);
44 +
45         /*
46          * barriers implicitly indicate back insertion
47          */
48 @@ -371,6 +374,9 @@ struct request *elv_next_request(request
49         int ret;
50  
51         while ((rq = __elv_next_request(q)) != NULL) {
52 +
53 +               blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
54 +
55                 /*
56                  * just mark as started even if we don't start it, a request
57                  * that has been delayed should not be passed by new incoming
58 diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
59 --- a/drivers/block/ioctl.c
60 +++ b/drivers/block/ioctl.c
61 @@ -4,6 +4,7 @@
62  #include <linux/backing-dev.h>
63  #include <linux/buffer_head.h>
64  #include <linux/smp_lock.h>
65 +#include <linux/blktrace.h>
66  #include <asm/uaccess.h>
67  
68  static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
69 @@ -188,6 +189,10 @@ static int blkdev_locked_ioctl(struct fi
70                 return put_ulong(arg, bdev->bd_inode->i_size >> 9);
71         case BLKGETSIZE64:
72                 return put_u64(arg, bdev->bd_inode->i_size);
73 +       case BLKSTARTTRACE:
74 +               return blk_start_trace(bdev, (char __user *) arg);
75 +       case BLKSTOPTRACE:
76 +               return blk_stop_trace(bdev);
77         }
78         return -ENOIOCTLCMD;
79  }
80 diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
81 --- a/drivers/block/ll_rw_blk.c
82 +++ b/drivers/block/ll_rw_blk.c
83 @@ -29,6 +29,7 @@
84  #include <linux/swap.h>
85  #include <linux/writeback.h>
86  #include <linux/blkdev.h>
87 +#include <linux/blktrace.h>
88  
89  /*
90   * for max sense size
91 @@ -1422,8 +1423,10 @@ void blk_plug_device(request_queue_t *q)
92         if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
93                 return;
94  
95 -       if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
96 +       if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
97                 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
98 +               blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
99 +       }
100  }
101  
102  EXPORT_SYMBOL(blk_plug_device);
103 @@ -1487,14 +1490,21 @@ static void blk_backing_dev_unplug(struc
104         /*
105          * devices don't necessarily have an ->unplug_fn defined
106          */
107 -       if (q->unplug_fn)
108 +       if (q->unplug_fn) {
109 +               blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
110 +                                       q->rq.count[READ] + q->rq.count[WRITE]);
111 +
112                 q->unplug_fn(q);
113 +       }
114  }
115  
116  static void blk_unplug_work(void *data)
117  {
118         request_queue_t *q = data;
119  
120 +       blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
121 +                               q->rq.count[READ] + q->rq.count[WRITE]);
122 +
123         q->unplug_fn(q);
124  }
125  
126 @@ -1502,6 +1512,9 @@ static void blk_unplug_timeout(unsigned 
127  {
128         request_queue_t *q = (request_queue_t *)data;
129  
130 +       blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
131 +                               q->rq.count[READ] + q->rq.count[WRITE]);
132 +
133         kblockd_schedule_work(&q->unplug_work);
134  }
135  
136 @@ -1624,6 +1637,11 @@ void blk_cleanup_queue(request_queue_t *
137         if (q->queue_tags)
138                 __blk_queue_free_tags(q);
139  
140 +       if (q->blk_trace) {
141 +               blk_cleanup_trace(q->blk_trace);
142 +               q->blk_trace = NULL;
143 +       }
144 +
145         blk_queue_ordered(q, QUEUE_ORDERED_NONE);
146  
147         kmem_cache_free(requestq_cachep, q);
148 @@ -1970,6 +1988,8 @@ rq_starved:
149         
150         rq_init(q, rq);
151         rq->rl = rl;
152 +
153 +       blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
154  out:
155         return rq;
156  }
157 @@ -1998,6 +2018,8 @@ static struct request *get_request_wait(
158                 if (!rq) {
159                         struct io_context *ioc;
160  
161 +                       blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
162 +
163                         __generic_unplug_device(q);
164                         spin_unlock_irq(q->queue_lock);
165                         io_schedule();
166 @@ -2051,6 +2073,8 @@ EXPORT_SYMBOL(blk_get_request);
167   */
168  void blk_requeue_request(request_queue_t *q, struct request *rq)
169  {
170 +       blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
171 +
172         if (blk_rq_tagged(rq))
173                 blk_queue_end_tag(q, rq);
174  
175 @@ -2676,6 +2700,8 @@ static int __make_request(request_queue_
176                         if (!q->back_merge_fn(q, req, bio))
177                                 break;
178  
179 +                       blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
180 +
181                         req->biotail->bi_next = bio;
182                         req->biotail = bio;
183                         req->nr_sectors = req->hard_nr_sectors += nr_sectors;
184 @@ -2691,6 +2717,8 @@ static int __make_request(request_queue_
185                         if (!q->front_merge_fn(q, req, bio))
186                                 break;
187  
188 +                       blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
189 +
190                         bio->bi_next = req->bio;
191                         req->bio = bio;
192  
193 @@ -2991,6 +3019,8 @@ end_io:
194                  */
195                 blk_partition_remap(bio);
196  
197 +               blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
198 +
199                 ret = q->make_request_fn(q, bio);
200         } while (ret);
201  }
202 @@ -3110,6 +3140,8 @@ static int __end_that_request_first(stru
203         int total_bytes, bio_nbytes, error, next_idx = 0;
204         struct bio *bio;
205  
206 +       blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
207 +
208         /*
209          * extend uptodate bool to allow < 0 value to be direct io error
210          */
211 diff --git a/fs/bio.c b/fs/bio.c
212 --- a/fs/bio.c
213 +++ b/fs/bio.c
214 @@ -25,6 +25,7 @@
215  #include <linux/module.h>
216  #include <linux/mempool.h>
217  #include <linux/workqueue.h>
218 +#include <linux/blktrace.h>
219  #include <scsi/sg.h>           /* for struct sg_iovec */
220  
221  #define BIO_POOL_SIZE 256
222 @@ -1050,6 +1051,9 @@ struct bio_pair *bio_split(struct bio *b
223         if (!bp)
224                 return bp;
225  
226 +       blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
227 +                               bi->bi_sector + first_sectors);
228 +
229         BUG_ON(bi->bi_vcnt != 1);
230         BUG_ON(bi->bi_idx != 0);
231         atomic_set(&bp->cnt, 3);
232 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
233 --- a/include/linux/blkdev.h
234 +++ b/include/linux/blkdev.h
235 @@ -22,6 +22,7 @@ typedef struct request_queue request_que
236  struct elevator_queue;
237  typedef struct elevator_queue elevator_t;
238  struct request_pm_state;
239 +struct blk_trace;
240  
241  #define BLKDEV_MIN_RQ  4
242  #define BLKDEV_MAX_RQ  128     /* Default maximum */
243 @@ -412,6 +413,8 @@ struct request_queue
244          */
245         struct request          *flush_rq;
246         unsigned char           ordered;
247 +
248 +       struct blk_trace        *blk_trace;
249  };
250  
251  enum {
252 diff --git a/include/linux/fs.h b/include/linux/fs.h
253 --- a/include/linux/fs.h
254 +++ b/include/linux/fs.h
255 @@ -196,6 +196,8 @@ extern int dir_notify_enable;
256  #define BLKBSZGET  _IOR(0x12,112,size_t)
257  #define BLKBSZSET  _IOW(0x12,113,size_t)
258  #define BLKGETSIZE64 _IOR(0x12,114,size_t)     /* return device size in bytes (u64 *arg) */
259 +#define BLKSTARTTRACE _IOWR(0x12,115,struct blk_user_trace_setup)
260 +#define BLKSTOPTRACE _IO(0x12,116)
261  
262  #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
263  #define FIBMAP    _IO(0x00,1)  /* bmap access */
264 diff --git a/mm/highmem.c b/mm/highmem.c
265 --- a/mm/highmem.c
266 +++ b/mm/highmem.c
267 @@ -26,6 +26,7 @@
268  #include <linux/init.h>
269  #include <linux/hash.h>
270  #include <linux/highmem.h>
271 +#include <linux/blktrace.h>
272  #include <asm/tlbflush.h>
273  
274  static mempool_t *page_pool, *isa_page_pool;
275 @@ -479,6 +480,8 @@ void blk_queue_bounce(request_queue_t *q
276                 pool = isa_page_pool;
277         }
278  
279 +       blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
280 +
281         /*
282          * slow path
283          */
284 --- /dev/null   2005-09-09 21:24:12.000000000 +0200
285 +++ linux-2.6/drivers/block/blktrace.c  2005-09-28 08:46:33.000000000 +0200
286 @@ -0,0 +1,232 @@
287 +#include <linux/config.h>
288 +#include <linux/kernel.h>
289 +#include <linux/blkdev.h>
290 +#include <linux/blktrace.h>
291 +#include <linux/percpu.h>
292 +#include <linux/init.h>
293 +#include <asm/uaccess.h>
294 +
295 +static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
296 +
297 +void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
298 +                    int rw, u32 what, int error, int pdu_len, void *pdu_data)
299 +{
300 +       struct blk_io_trace t;
301 +       unsigned long flags;
302 +       int cpu;
303 +
304 +       if (rw & (1 << BIO_RW_BARRIER))
305 +               what |= BLK_TC_ACT(BLK_TC_BARRIER);
306 +       if (rw & (1 << BIO_RW_SYNC))
307 +               what |= BLK_TC_ACT(BLK_TC_SYNC);
308 +
309 +       if (rw & WRITE)
310 +               what |= BLK_TC_ACT(BLK_TC_WRITE);
311 +       else
312 +               what |= BLK_TC_ACT(BLK_TC_READ);
313 +
314 +       if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
315 +               return;
316 +
317 +       t.magic         = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
318 +
319 +       t.device        = bt->dev;
320 +       t.sector        = sector;
321 +       t.bytes         = bytes;
322 +       t.action        = what;
323 +       t.error         = error;
324 +       t.pdu_len       = pdu_len;
325 +
326 +       t.pid           = current->pid;
327 +       memcpy(t.comm, current->comm, sizeof(t.comm));
328 +
329 +       /*
330 +        * need to serialize this part on the local processor to prevent
331 +        * interrupts for messing with sequence <-> time relation
332 +        */
333 +       local_irq_save(flags);
334 +
335 +       t.sequence      = atomic_add_return(1, &bt->sequence);
336 +
337 +       cpu = smp_processor_id();
338 +       t.cpu           = cpu;
339 +       t.time          = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
340 +
341 +       __relay_write(bt->rchan, &t, sizeof(t));
342 +       if (pdu_len)
343 +               __relay_write(bt->rchan, pdu_data, pdu_len);
344 +
345 +       local_irq_restore(flags);
346 +}
347 +
348 +EXPORT_SYMBOL_GPL(__blk_add_trace);
349 +
350 +static struct dentry *blk_tree_root;
351 +static DECLARE_MUTEX(blk_tree_mutex);
352 +
353 +static inline void blk_remove_root(void)
354 +{
355 +       if (relayfs_remove_dir(blk_tree_root) != -ENOTEMPTY)
356 +               blk_tree_root = NULL;
357 +}
358 +
359 +static void blk_remove_tree(struct dentry *dir)
360 +{
361 +       down(&blk_tree_mutex);
362 +       relayfs_remove_dir(dir);
363 +       blk_remove_root();
364 +       up(&blk_tree_mutex);
365 +}
366 +
367 +static struct dentry *blk_create_tree(const char *blk_name)
368 +{
369 +       struct dentry *dir = NULL;
370 +
371 +       down(&blk_tree_mutex);
372 +
373 +       if (!blk_tree_root) {
374 +               blk_tree_root = relayfs_create_dir("block", NULL);
375 +               if (!blk_tree_root)
376 +                       goto err;
377 +       }
378 +
379 +       dir = relayfs_create_dir(blk_name, blk_tree_root);
380 +       if (!dir)
381 +               blk_remove_root();
382 +
383 +err:
384 +       up(&blk_tree_mutex);
385 +       return dir;
386 +}
387 +
388 +void blk_cleanup_trace(struct blk_trace *bt)
389 +{
390 +       relay_close(bt->rchan);
391 +       blk_remove_tree(bt->dir);
392 +       kfree(bt);
393 +}
394 +
395 +int blk_stop_trace(struct block_device *bdev)
396 +{
397 +       request_queue_t *q = bdev_get_queue(bdev);
398 +       struct blk_trace *bt = NULL;
399 +       int ret = -EINVAL;
400 +
401 +       if (!q)
402 +               return -ENXIO;
403 +
404 +       down(&bdev->bd_sem);
405 +
406 +       if (q->blk_trace) {
407 +               bt = q->blk_trace;
408 +               q->blk_trace = NULL;
409 +               ret = 0;
410 +       }
411 +
412 +       up(&bdev->bd_sem);
413 +
414 +       if (bt)
415 +               blk_cleanup_trace(bt);
416 +
417 +       return ret;
418 +}
419 +
420 +int blk_start_trace(struct block_device *bdev, char __user *arg)
421 +{
422 +       request_queue_t *q = bdev_get_queue(bdev);
423 +       struct blk_user_trace_setup buts;
424 +       struct blk_trace *bt = NULL;
425 +       struct dentry *dir = NULL;
426 +       char b[BDEVNAME_SIZE];
427 +       int ret;
428 +
429 +       if (!q)
430 +               return -ENXIO;
431 +
432 +       if (copy_from_user(&buts, arg, sizeof(buts)))
433 +               return -EFAULT;
434 +
435 +       if (!buts.buf_size || !buts.buf_nr)
436 +               return -EINVAL;
437 +
438 +       strcpy(buts.name, bdevname(bdev, b));
439 +
440 +       if (copy_to_user(arg, &buts, sizeof(buts)))
441 +               return -EFAULT;
442 +
443 +       down(&bdev->bd_sem);
444 +       ret = -EBUSY;
445 +       if (q->blk_trace)
446 +               goto err;
447 +
448 +       ret = -ENOMEM;
449 +       bt = kmalloc(sizeof(*bt), GFP_KERNEL);
450 +       if (!bt)
451 +               goto err;
452 +
453 +       ret = -ENOENT;
454 +       dir = blk_create_tree(bdevname(bdev, b));
455 +       if (!dir)
456 +               goto err;
457 +
458 +       bt->dir = dir;
459 +       bt->dev = bdev->bd_dev;
460 +       atomic_set(&bt->sequence, 0);
461 +
462 +       ret = -EIO;
463 +       bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, NULL);
464 +       if (!bt->rchan)
465 +               goto err;
466 +
467 +       bt->act_mask = buts.act_mask;
468 +       if (!bt->act_mask)
469 +               bt->act_mask = (u16) -1;
470 +
471 +       q->blk_trace = bt;
472 +       up(&bdev->bd_sem);
473 +       return 0;
474 +err:
475 +       up(&bdev->bd_sem);
476 +       if (dir)
477 +               blk_remove_tree(dir);
478 +       if (bt)
479 +               kfree(bt);
480 +       return ret;
481 +}
482 +
483 +static void blk_trace_check_cpu_time(void *data)
484 +{
485 +       unsigned long long a, b, *t;
486 +       struct timeval tv;
487 +       int cpu = get_cpu();
488 +
489 +       t = &per_cpu(blk_trace_cpu_offset, cpu);
490 +
491 +       a = sched_clock();
492 +       do_gettimeofday(&tv);
493 +       b = sched_clock();
494 +
495 +       *t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
496 +       *t -= (a + b) / 2;
497 +       put_cpu();
498 +}
499 +
500 +static int blk_trace_calibrate_offsets(void)
501 +{
502 +       unsigned long flags;
503 +
504 +       smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
505 +       local_irq_save(flags);
506 +       blk_trace_check_cpu_time(NULL);
507 +       local_irq_restore(flags);
508 +
509 +       return 0;
510 +}
511 +
512 +static __init int blk_trace_init(void)
513 +{
514 +       return blk_trace_calibrate_offsets();
515 +}
516 +
517 +module_init(blk_trace_init);
518 +
519 --- /dev/null   2005-09-09 21:24:12.000000000 +0200
520 +++ linux-2.6/include/linux/blktrace.h  2005-09-28 08:46:33.000000000 +0200
521 @@ -0,0 +1,180 @@
522 +#ifndef BLKTRACE_H
523 +#define BLKTRACE_H
524 +
525 +#include <linux/config.h>
526 +#include <linux/blkdev.h>
527 +#include <linux/relayfs_fs.h>
528 +
529 +/*
530 + * Trace categories
531 + */
532 +enum {
533 +       BLK_TC_READ     = 1 << 0,       /* reads */
534 +       BLK_TC_WRITE    = 1 << 1,       /* writes */
535 +       BLK_TC_BARRIER  = 1 << 2,       /* barrier */
536 +       BLK_TC_SYNC     = 1 << 3,       /* barrier */
537 +       BLK_TC_QUEUE    = 1 << 4,       /* queueing/merging */
538 +       BLK_TC_REQUEUE  = 1 << 5,       /* requeueing */
539 +       BLK_TC_ISSUE    = 1 << 6,       /* issue */
540 +       BLK_TC_COMPLETE = 1 << 7,       /* completions */
541 +       BLK_TC_FS       = 1 << 8,       /* fs requests */
542 +       BLK_TC_PC       = 1 << 9,       /* pc requests */
543 +
544 +       BLK_TC_END      = 1 << 15,      /* only 16-bits, reminder */
545 +};
546 +
547 +#define BLK_TC_SHIFT           (16)
548 +#define BLK_TC_ACT(act)                ((act) << BLK_TC_SHIFT)
549 +
550 +/*
551 + * Basic trace actions
552 + */
553 +enum {
554 +       __BLK_TA_QUEUE = 1,             /* queued */
555 +       __BLK_TA_BACKMERGE,             /* back merged to existing rq */
556 +       __BLK_TA_FRONTMERGE,            /* front merge to existing rq */
557 +       __BLK_TA_GETRQ,                 /* allocated new request */
558 +       __BLK_TA_SLEEPRQ,               /* sleeping on rq allocation */
559 +       __BLK_TA_REQUEUE,               /* request requeued */
560 +       __BLK_TA_ISSUE,                 /* sent to driver */
561 +       __BLK_TA_COMPLETE,              /* completed by driver */
562 +       __BLK_TA_PLUG,                  /* queue was plugged */
563 +       __BLK_TA_UNPLUG_IO,             /* queue was unplugged by io */
564 +       __BLK_TA_UNPLUG_TIMER,          /* queue was unplugged by timer */
565 +       __BLK_TA_INSERT,                /* insert request */
566 +       __BLK_TA_SPLIT,                 /* bio was split */
567 +       __BLK_TA_BOUNCE,                /* bio was bounced */
568 +};
569 +
570 +/*
571 + * Trace actions in full. Additionally, read or write is masked
572 + */
573 +#define BLK_TA_QUEUE           (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
574 +#define BLK_TA_BACKMERGE       (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
575 +#define BLK_TA_FRONTMERGE      (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
576 +#define        BLK_TA_GETRQ            (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
577 +#define        BLK_TA_SLEEPRQ          (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
578 +#define        BLK_TA_REQUEUE          (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
579 +#define BLK_TA_ISSUE           (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
580 +#define BLK_TA_COMPLETE                (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
581 +#define BLK_TA_PLUG            (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
582 +#define BLK_TA_UNPLUG_IO       (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
583 +#define BLK_TA_UNPLUG_TIMER    (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
584 +#define BLK_TA_INSERT          (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
585 +#define BLK_TA_SPLIT           (__BLK_TA_SPLIT)
586 +#define BLK_TA_BOUNCE          (__BLK_TA_BOUNCE)
587 +
588 +#define BLK_IO_TRACE_MAGIC     0x65617400
589 +#define BLK_IO_TRACE_VERSION   0x05
590 +
591 +/*
592 + * The trace itself
593 + */
594 +struct blk_io_trace {
595 +       u32 magic;              /* MAGIC << 8 | version */
596 +       u32 sequence;           /* event number */
597 +       u64 time;               /* in microseconds */
598 +       u64 sector;             /* disk offset */
599 +       u32 bytes;              /* transfer length */
600 +       u32 action;             /* what happened */
601 +       u32 pid;                /* who did it */
602 +       u32 cpu;                /* on what cpu did it happen */
603 +       u16 error;              /* completion error */
604 +       u16 pdu_len;            /* length of data after this trace */
605 +       u32 device;             /* device number */
606 +       char comm[16];          /* task command name (TASK_COMM_LEN) */
607 +};
608 +
609 +struct blk_trace {
610 +       struct dentry *dir;
611 +       struct rchan *rchan;
612 +       atomic_t sequence;
613 +       u32 dev;
614 +       u16 act_mask;
615 +};
616 +
617 +/*
618 + * User setup structure passed with BLKSTARTTRACE
619 + */
620 +struct blk_user_trace_setup {
621 +       char name[BDEVNAME_SIZE];       /* output */
622 +       u16 act_mask;                   /* input */
623 +       u32 buf_size;                   /* input */
624 +       u32 buf_nr;                     /* input */
625 +};
626 +
627 +#if defined(CONFIG_BLK_DEV_IO_TRACE)
628 +extern int blk_start_trace(struct block_device *, char __user *);
629 +extern int blk_stop_trace(struct block_device *);
630 +extern void blk_cleanup_trace(struct blk_trace *);
631 +extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
632 +
633 +static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
634 +                                   u32 what)
635 +{
636 +       struct blk_trace *bt = q->blk_trace;
637 +       int rw = rq->flags & 0x07;
638 +
639 +       if (likely(!bt))
640 +               return;
641 +
642 +       if (blk_pc_request(rq)) {
643 +               what |= BLK_TC_ACT(BLK_TC_PC);
644 +               __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
645 +       } else  {
646 +               what |= BLK_TC_ACT(BLK_TC_FS);
647 +               __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
648 +       }
649 +}
650 +
651 +static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
652 +                                    u32 what)
653 +{
654 +       struct blk_trace *bt = q->blk_trace;
655 +
656 +       if (likely(!bt))
657 +               return;
658 +
659 +       __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
660 +}
661 +
662 +static inline void blk_add_trace_generic(struct request_queue *q,
663 +                                        struct bio *bio, int rw, u32 what)
664 +{
665 +       struct blk_trace *bt = q->blk_trace;
666 +
667 +       if (likely(!bt))
668 +               return;
669 +
670 +       if (bio)
671 +               blk_add_trace_bio(q, bio, what);
672 +       else
673 +               __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
674 +}
675 +
676 +static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
677 +                                        struct bio *bio, unsigned int pdu)
678 +{
679 +       struct blk_trace *bt = q->blk_trace;
680 +       u64 rpdu = cpu_to_be64(pdu);
681 +
682 +       if (likely(!bt))
683 +               return;
684 +
685 +       if (bio)
686 +               __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
687 +       else
688 +               __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
689 +}
690 +
691 +#else /* !CONFIG_BLK_DEV_IO_TRACE */
692 +#define blk_start_trace(bdev, arg)             (-EINVAL)
693 +#define blk_stop_trace(bdev)                   (-EINVAL)
694 +#define blk_cleanup_trace(bt)                  do { } while (0)
695 +#define blk_add_trace_rq(q, rq, what)          do { } while (0)
696 +#define blk_add_trace_bio(q, rq, what)         do { } while (0)
697 +#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
698 +#define blk_add_trace_pdu_int(q, what, bio, pdu)       do { } while (0)
699 +#endif /* CONFIG_BLK_DEV_IO_TRACE */
700 +
701 +#endif