[PATCH] blkparse: another stab at stopwatch_end fixing
[blktrace.git] / kernel / blk-trace-2.6.14-rc2-git-J0
1 diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
2 --- a/drivers/block/Kconfig
3 +++ b/drivers/block/Kconfig
4 @@ -419,6 +419,14 @@ config LBD
5           your machine, or if you want to have a raid or loopback device
6           bigger than 2TB.  Otherwise say N.
7  
8 +config BLK_DEV_IO_TRACE
9 +       bool "Support for tracing block io actions"
10 +       select RELAYFS_FS
11 +       help
12 +         Say Y here, if you want to be able to trace the block layer actions
13 +         on a given queue.
14 +
15 +
16  config CDROM_PKTCDVD
17         tristate "Packet writing on CD/DVD media"
18         depends on !UML
19 diff --git a/drivers/block/Makefile b/drivers/block/Makefile
20 --- a/drivers/block/Makefile
21 +++ b/drivers/block/Makefile
22 @@ -45,3 +45,5 @@ obj-$(CONFIG_VIODASD)         += viodasd.o
23  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
24  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
25  
26 +obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
27 +
28 diff --git a/drivers/block/elevator.c b/drivers/block/elevator.c
29 --- a/drivers/block/elevator.c
30 +++ b/drivers/block/elevator.c
31 @@ -34,6 +34,7 @@
32  #include <linux/slab.h>
33  #include <linux/init.h>
34  #include <linux/compiler.h>
35 +#include <linux/blktrace.h>
36  
37  #include <asm/uaccess.h>
38  
39 @@ -305,6 +306,8 @@ void elv_requeue_request(request_queue_t
40  void __elv_add_request(request_queue_t *q, struct request *rq, int where,
41                        int plug)
42  {
43 +       blk_add_trace_rq(q, rq, BLK_TA_INSERT);
44 +
45         /*
46          * barriers implicitly indicate back insertion
47          */
48 @@ -371,6 +374,9 @@ struct request *elv_next_request(request
49         int ret;
50  
51         while ((rq = __elv_next_request(q)) != NULL) {
52 +
53 +               blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
54 +
55                 /*
56                  * just mark as started even if we don't start it, a request
57                  * that has been delayed should not be passed by new incoming
58 diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c
59 --- a/drivers/block/ioctl.c
60 +++ b/drivers/block/ioctl.c
61 @@ -4,6 +4,7 @@
62  #include <linux/backing-dev.h>
63  #include <linux/buffer_head.h>
64  #include <linux/smp_lock.h>
65 +#include <linux/blktrace.h>
66  #include <asm/uaccess.h>
67  
68  static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
69 @@ -188,6 +189,10 @@ static int blkdev_locked_ioctl(struct fi
70                 return put_ulong(arg, bdev->bd_inode->i_size >> 9);
71         case BLKGETSIZE64:
72                 return put_u64(arg, bdev->bd_inode->i_size);
73 +       case BLKSTARTTRACE:
74 +               return blk_start_trace(bdev, (char __user *) arg);
75 +       case BLKSTOPTRACE:
76 +               return blk_stop_trace(bdev);
77         }
78         return -ENOIOCTLCMD;
79  }
80 diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
81 --- a/drivers/block/ll_rw_blk.c
82 +++ b/drivers/block/ll_rw_blk.c
83 @@ -29,6 +29,7 @@
84  #include <linux/swap.h>
85  #include <linux/writeback.h>
86  #include <linux/blkdev.h>
87 +#include <linux/blktrace.h>
88  
89  /*
90   * for max sense size
91 @@ -1422,8 +1423,10 @@ void blk_plug_device(request_queue_t *q)
92         if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
93                 return;
94  
95 -       if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
96 +       if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
97                 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
98 +               blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
99 +       }
100  }
101  
102  EXPORT_SYMBOL(blk_plug_device);
103 @@ -1487,8 +1490,12 @@ static void blk_backing_dev_unplug(struc
104         /*
105          * devices don't necessarily have an ->unplug_fn defined
106          */
107 -       if (q->unplug_fn)
108 +       if (q->unplug_fn) {
109 +               int nrq = q->rq.count[READ] + q->rq.count[WRITE];
110 +
111 +               blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, nrq);
112                 q->unplug_fn(q);
113 +       }
114  }
115  
116  static void blk_unplug_work(void *data)
117 @@ -1501,7 +1508,9 @@ static void blk_unplug_work(void *data)
118  static void blk_unplug_timeout(unsigned long data)
119  {
120         request_queue_t *q = (request_queue_t *)data;
121 +       int nrq = q->rq.count[READ] + q->rq.count[WRITE];
122  
123 +       blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, nrq);
124         kblockd_schedule_work(&q->unplug_work);
125  }
126  
127 @@ -1624,6 +1633,11 @@ void blk_cleanup_queue(request_queue_t *
128         if (q->queue_tags)
129                 __blk_queue_free_tags(q);
130  
131 +       if (q->blk_trace) {
132 +               blk_cleanup_trace(q->blk_trace);
133 +               q->blk_trace = NULL;
134 +       }
135 +
136         blk_queue_ordered(q, QUEUE_ORDERED_NONE);
137  
138         kmem_cache_free(requestq_cachep, q);
139 @@ -1970,6 +1984,8 @@ rq_starved:
140         
141         rq_init(q, rq);
142         rq->rl = rl;
143 +
144 +       blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
145  out:
146         return rq;
147  }
148 @@ -1998,6 +2014,8 @@ static struct request *get_request_wait(
149                 if (!rq) {
150                         struct io_context *ioc;
151  
152 +                       blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
153 +
154                         __generic_unplug_device(q);
155                         spin_unlock_irq(q->queue_lock);
156                         io_schedule();
157 @@ -2051,6 +2069,8 @@ EXPORT_SYMBOL(blk_get_request);
158   */
159  void blk_requeue_request(request_queue_t *q, struct request *rq)
160  {
161 +       blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
162 +
163         if (blk_rq_tagged(rq))
164                 blk_queue_end_tag(q, rq);
165  
166 @@ -2676,6 +2696,8 @@ static int __make_request(request_queue_
167                         if (!q->back_merge_fn(q, req, bio))
168                                 break;
169  
170 +                       blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
171 +
172                         req->biotail->bi_next = bio;
173                         req->biotail = bio;
174                         req->nr_sectors = req->hard_nr_sectors += nr_sectors;
175 @@ -2691,6 +2713,8 @@ static int __make_request(request_queue_
176                         if (!q->front_merge_fn(q, req, bio))
177                                 break;
178  
179 +                       blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
180 +
181                         bio->bi_next = req->bio;
182                         req->bio = bio;
183  
184 @@ -2991,6 +3015,8 @@ end_io:
185                  */
186                 blk_partition_remap(bio);
187  
188 +               blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
189 +
190                 ret = q->make_request_fn(q, bio);
191         } while (ret);
192  }
193 @@ -3110,6 +3136,8 @@ static int __end_that_request_first(stru
194         int total_bytes, bio_nbytes, error, next_idx = 0;
195         struct bio *bio;
196  
197 +       blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
198 +
199         /*
200          * extend uptodate bool to allow < 0 value to be direct io error
201          */
202 diff --git a/fs/bio.c b/fs/bio.c
203 --- a/fs/bio.c
204 +++ b/fs/bio.c
205 @@ -25,6 +25,7 @@
206  #include <linux/module.h>
207  #include <linux/mempool.h>
208  #include <linux/workqueue.h>
209 +#include <linux/blktrace.h>
210  #include <scsi/sg.h>           /* for struct sg_iovec */
211  
212  #define BIO_POOL_SIZE 256
213 @@ -1050,6 +1051,9 @@ struct bio_pair *bio_split(struct bio *b
214         if (!bp)
215                 return bp;
216  
217 +       blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi,
218 +                               bi->bi_sector + first_sectors);
219 +
220         BUG_ON(bi->bi_vcnt != 1);
221         BUG_ON(bi->bi_idx != 0);
222         atomic_set(&bp->cnt, 3);
223 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
224 --- a/include/linux/blkdev.h
225 +++ b/include/linux/blkdev.h
226 @@ -22,6 +22,7 @@ typedef struct request_queue request_que
227  struct elevator_queue;
228  typedef struct elevator_queue elevator_t;
229  struct request_pm_state;
230 +struct blk_trace;
231  
232  #define BLKDEV_MIN_RQ  4
233  #define BLKDEV_MAX_RQ  128     /* Default maximum */
234 @@ -412,6 +413,8 @@ struct request_queue
235          */
236         struct request          *flush_rq;
237         unsigned char           ordered;
238 +
239 +       struct blk_trace        *blk_trace;
240  };
241  
242  enum {
243 diff --git a/include/linux/fs.h b/include/linux/fs.h
244 --- a/include/linux/fs.h
245 +++ b/include/linux/fs.h
246 @@ -196,6 +196,8 @@ extern int dir_notify_enable;
247  #define BLKBSZGET  _IOR(0x12,112,size_t)
248  #define BLKBSZSET  _IOW(0x12,113,size_t)
249  #define BLKGETSIZE64 _IOR(0x12,114,size_t)     /* return device size in bytes (u64 *arg) */
250 +#define BLKSTARTTRACE _IOWR(0x12,115,struct blk_user_trace_setup)
251 +#define BLKSTOPTRACE _IO(0x12,116)
252  
253  #define BMAP_IOCTL 1           /* obsolete - kept for compatibility */
254  #define FIBMAP    _IO(0x00,1)  /* bmap access */
255 diff --git a/mm/highmem.c b/mm/highmem.c
256 --- a/mm/highmem.c
257 +++ b/mm/highmem.c
258 @@ -26,6 +26,7 @@
259  #include <linux/init.h>
260  #include <linux/hash.h>
261  #include <linux/highmem.h>
262 +#include <linux/blktrace.h>
263  #include <asm/tlbflush.h>
264  
265  static mempool_t *page_pool, *isa_page_pool;
266 @@ -479,6 +480,8 @@ void blk_queue_bounce(request_queue_t *q
267                 pool = isa_page_pool;
268         }
269  
270 +       blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
271 +
272         /*
273          * slow path
274          */
275 --- /dev/null   2005-09-09 21:24:12.000000000 +0200
276 +++ linux-2.6/drivers/block/blktrace.c  2005-09-22 16:46:20.000000000 +0200
277 @@ -0,0 +1,226 @@
278 +#include <linux/config.h>
279 +#include <linux/kernel.h>
280 +#include <linux/blkdev.h>
281 +#include <linux/blktrace.h>
282 +#include <linux/percpu.h>
283 +#include <linux/init.h>
284 +#include <asm/uaccess.h>
285 +
286 +static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
287 +
288 +void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
289 +                    int rw, u32 what, int error, int pdu_len, void *pdu_data)
290 +{
291 +       struct blk_io_trace t;
292 +       unsigned long flags;
293 +       int cpu;
294 +
295 +       if (rw & (1 << BIO_RW_BARRIER))
296 +               what |= BLK_TC_ACT(BLK_TC_BARRIER);
297 +       if (rw & (1 << BIO_RW_SYNC))
298 +               what |= BLK_TC_ACT(BLK_TC_SYNC);
299 +
300 +       if (rw & WRITE)
301 +               what |= BLK_TC_ACT(BLK_TC_WRITE);
302 +       else
303 +               what |= BLK_TC_ACT(BLK_TC_READ);
304 +
305 +       if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
306 +               return;
307 +
308 +       t.magic         = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
309 +       t.sequence      = atomic_add_return(1, &bt->sequence);
310 +
311 +       cpu = get_cpu();
312 +       t.cpu           = cpu;
313 +       t.time          = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
314 +       put_cpu();
315 +
316 +       t.device        = bt->dev;
317 +       t.sector        = sector;
318 +       t.bytes         = bytes;
319 +       t.action        = what;
320 +       t.error         = error;
321 +       t.pdu_len       = pdu_len;
322 +
323 +       t.pid           = current->pid;
324 +       memcpy(t.comm, current->comm, sizeof(t.comm));
325 +
326 +       local_irq_save(flags);
327 +       __relay_write(bt->rchan, &t, sizeof(t));
328 +       if (pdu_len)
329 +               __relay_write(bt->rchan, pdu_data, pdu_len);
330 +       local_irq_restore(flags);
331 +}
332 +
333 +EXPORT_SYMBOL_GPL(__blk_add_trace);
334 +
335 +static struct dentry *blk_tree_root;
336 +static DECLARE_MUTEX(blk_tree_mutex);
337 +
338 +static inline void blk_remove_root(void)
339 +{
340 +       if (relayfs_remove_dir(blk_tree_root) != -ENOTEMPTY)
341 +               blk_tree_root = NULL;
342 +}
343 +
344 +static void blk_remove_tree(struct dentry *dir)
345 +{
346 +       down(&blk_tree_mutex);
347 +       relayfs_remove_dir(dir);
348 +       blk_remove_root();
349 +       up(&blk_tree_mutex);
350 +}
351 +
352 +static struct dentry *blk_create_tree(const char *blk_name)
353 +{
354 +       struct dentry *dir = NULL;
355 +
356 +       down(&blk_tree_mutex);
357 +
358 +       if (!blk_tree_root) {
359 +               blk_tree_root = relayfs_create_dir("block", NULL);
360 +               if (!blk_tree_root)
361 +                       goto err;
362 +       }
363 +
364 +       dir = relayfs_create_dir(blk_name, blk_tree_root);
365 +       if (!dir)
366 +               blk_remove_root();
367 +
368 +err:
369 +       up(&blk_tree_mutex);
370 +       return dir;
371 +}
372 +
373 +void blk_cleanup_trace(struct blk_trace *bt)
374 +{
375 +       relay_close(bt->rchan);
376 +       blk_remove_tree(bt->dir);
377 +       kfree(bt);
378 +}
379 +
380 +int blk_stop_trace(struct block_device *bdev)
381 +{
382 +       request_queue_t *q = bdev_get_queue(bdev);
383 +       struct blk_trace *bt = NULL;
384 +       int ret = -EINVAL;
385 +
386 +       if (!q)
387 +               return -ENXIO;
388 +
389 +       down(&bdev->bd_sem);
390 +
391 +       if (q->blk_trace) {
392 +               bt = q->blk_trace;
393 +               q->blk_trace = NULL;
394 +               ret = 0;
395 +       }
396 +
397 +       up(&bdev->bd_sem);
398 +
399 +       if (bt)
400 +               blk_cleanup_trace(bt);
401 +
402 +       return ret;
403 +}
404 +
405 +int blk_start_trace(struct block_device *bdev, char __user *arg)
406 +{
407 +       request_queue_t *q = bdev_get_queue(bdev);
408 +       struct blk_user_trace_setup buts;
409 +       struct blk_trace *bt = NULL;
410 +       struct dentry *dir = NULL;
411 +       char b[BDEVNAME_SIZE];
412 +       int ret;
413 +
414 +       if (!q)
415 +               return -ENXIO;
416 +
417 +       if (copy_from_user(&buts, arg, sizeof(buts)))
418 +               return -EFAULT;
419 +
420 +       if (!buts.buf_size || !buts.buf_nr)
421 +               return -EINVAL;
422 +
423 +       strcpy(buts.name, bdevname(bdev, b));
424 +
425 +       if (copy_to_user(arg, &buts, sizeof(buts)))
426 +               return -EFAULT;
427 +
428 +       down(&bdev->bd_sem);
429 +       ret = -EBUSY;
430 +       if (q->blk_trace)
431 +               goto err;
432 +
433 +       ret = -ENOMEM;
434 +       bt = kmalloc(sizeof(*bt), GFP_KERNEL);
435 +       if (!bt)
436 +               goto err;
437 +
438 +       ret = -ENOENT;
439 +       dir = blk_create_tree(bdevname(bdev, b));
440 +       if (!dir)
441 +               goto err;
442 +
443 +       bt->dir = dir;
444 +       bt->dev = bdev->bd_dev;
445 +       atomic_set(&bt->sequence, 0);
446 +
447 +       ret = -EIO;
448 +       bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, NULL);
449 +       if (!bt->rchan)
450 +               goto err;
451 +
452 +       bt->act_mask = buts.act_mask;
453 +       if (!bt->act_mask)
454 +               bt->act_mask = (u16) -1;
455 +
456 +       q->blk_trace = bt;
457 +       up(&bdev->bd_sem);
458 +       return 0;
459 +err:
460 +       up(&bdev->bd_sem);
461 +       if (dir)
462 +               blk_remove_tree(dir);
463 +       if (bt)
464 +               kfree(bt);
465 +       return ret;
466 +}
467 +
468 +static void blk_trace_check_cpu_time(void *data)
469 +{
470 +       unsigned long long a, b, *t;
471 +       struct timeval tv;
472 +       int cpu = get_cpu();
473 +
474 +       t = &per_cpu(blk_trace_cpu_offset, cpu);
475 +
476 +       a = sched_clock();
477 +       do_gettimeofday(&tv);
478 +       b = sched_clock();
479 +
480 +       *t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
481 +       *t -= (a + b) / 2;
482 +       put_cpu();
483 +}
484 +
485 +static int blk_trace_calibrate_offsets(void)
486 +{
487 +       unsigned long flags;
488 +
489 +       smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
490 +       local_irq_save(flags);
491 +       blk_trace_check_cpu_time(NULL);
492 +       local_irq_restore(flags);
493 +
494 +       return 0;
495 +}
496 +
497 +static __init int blk_trace_init(void)
498 +{
499 +       return blk_trace_calibrate_offsets();
500 +}
501 +
502 +module_init(blk_trace_init);
503 +
504 --- /dev/null   2005-09-09 21:24:12.000000000 +0200
505 +++ linux-2.6/include/linux/blktrace.h  2005-09-22 08:47:50.000000000 +0200
506 @@ -0,0 +1,180 @@
507 +#ifndef BLKTRACE_H
508 +#define BLKTRACE_H
509 +
510 +#include <linux/config.h>
511 +#include <linux/blkdev.h>
512 +#include <linux/relayfs_fs.h>
513 +
514 +/*
515 + * Trace categories
516 + */
517 +enum {
518 +       BLK_TC_READ     = 1 << 0,       /* reads */
519 +       BLK_TC_WRITE    = 1 << 1,       /* writes */
520 +       BLK_TC_BARRIER  = 1 << 2,       /* barrier */
521 +       BLK_TC_SYNC     = 1 << 3,       /* barrier */
522 +       BLK_TC_QUEUE    = 1 << 4,       /* queueing/merging */
523 +       BLK_TC_REQUEUE  = 1 << 5,       /* requeueing */
524 +       BLK_TC_ISSUE    = 1 << 6,       /* issue */
525 +       BLK_TC_COMPLETE = 1 << 7,       /* completions */
526 +       BLK_TC_FS       = 1 << 8,       /* fs requests */
527 +       BLK_TC_PC       = 1 << 9,       /* pc requests */
528 +
529 +       BLK_TC_END      = 1 << 15,      /* only 16-bits, reminder */
530 +};
531 +
532 +#define BLK_TC_SHIFT           (16)
533 +#define BLK_TC_ACT(act)                ((act) << BLK_TC_SHIFT)
534 +
535 +/*
536 + * Basic trace actions
537 + */
538 +enum {
539 +       __BLK_TA_QUEUE = 1,             /* queued */
540 +       __BLK_TA_BACKMERGE,             /* back merged to existing rq */
541 +       __BLK_TA_FRONTMERGE,            /* front merge to existing rq */
542 +       __BLK_TA_GETRQ,                 /* allocated new request */
543 +       __BLK_TA_SLEEPRQ,               /* sleeping on rq allocation */
544 +       __BLK_TA_REQUEUE,               /* request requeued */
545 +       __BLK_TA_ISSUE,                 /* sent to driver */
546 +       __BLK_TA_COMPLETE,              /* completed by driver */
547 +       __BLK_TA_PLUG,                  /* queue was plugged */
548 +       __BLK_TA_UNPLUG_IO,             /* queue was unplugged by io */
549 +       __BLK_TA_UNPLUG_TIMER,          /* queue was unplugged by timer */
550 +       __BLK_TA_INSERT,                /* insert request */
551 +       __BLK_TA_SPLIT,                 /* bio was split */
552 +       __BLK_TA_BOUNCE,                /* bio was bounced */
553 +};
554 +
555 +/*
556 + * Trace actions in full. Additionally, read or write is masked
557 + */
558 +#define BLK_TA_QUEUE           (__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
559 +#define BLK_TA_BACKMERGE       (__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
560 +#define BLK_TA_FRONTMERGE      (__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
561 +#define        BLK_TA_GETRQ            (__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
562 +#define        BLK_TA_SLEEPRQ          (__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
563 +#define        BLK_TA_REQUEUE          (__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
564 +#define BLK_TA_ISSUE           (__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
565 +#define BLK_TA_COMPLETE                (__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
566 +#define BLK_TA_PLUG            (__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
567 +#define BLK_TA_UNPLUG_IO       (__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
568 +#define BLK_TA_UNPLUG_TIMER    (__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
569 +#define BLK_TA_INSERT          (__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
570 +#define BLK_TA_SPLIT           (__BLK_TA_SPLIT)
571 +#define BLK_TA_BOUNCE          (__BLK_TA_BOUNCE)
572 +
573 +#define BLK_IO_TRACE_MAGIC     0x65617400
574 +#define BLK_IO_TRACE_VERSION   0x05
575 +
576 +/*
577 + * The trace itself
578 + */
579 +struct blk_io_trace {
580 +       u32 magic;              /* MAGIC << 8 | version */
581 +       u32 sequence;           /* event number */
582 +       u64 time;               /* in microseconds */
583 +       u64 sector;             /* disk offset */
584 +       u32 bytes;              /* transfer length */
585 +       u32 action;             /* what happened */
586 +       u32 pid;                /* who did it */
587 +       u32 cpu;                /* on what cpu did it happen */
588 +       u16 error;              /* completion error */
589 +       u16 pdu_len;            /* length of data after this trace */
590 +       u32 device;             /* device number */
591 +       char comm[16];          /* task command name (TASK_COMM_LEN) */
592 +};
593 +
594 +struct blk_trace {
595 +       struct dentry *dir;
596 +       struct rchan *rchan;
597 +       atomic_t sequence;
598 +       u32 dev;
599 +       u16 act_mask;
600 +};
601 +
602 +/*
603 + * User setup structure passed with BLKSTARTTRACE
604 + */
605 +struct blk_user_trace_setup {
606 +       char name[BDEVNAME_SIZE];       /* output */
607 +       u16 act_mask;                   /* input */
608 +       u32 buf_size;                   /* input */
609 +       u32 buf_nr;                     /* input */
610 +};
611 +
612 +#if defined(CONFIG_BLK_DEV_IO_TRACE)
613 +extern int blk_start_trace(struct block_device *, char __user *);
614 +extern int blk_stop_trace(struct block_device *);
615 +extern void blk_cleanup_trace(struct blk_trace *);
616 +extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
617 +
618 +static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
619 +                                   u32 what)
620 +{
621 +       struct blk_trace *bt = q->blk_trace;
622 +       int rw = rq->flags & 0x07;
623 +
624 +       if (likely(!bt))
625 +               return;
626 +
627 +       if (blk_pc_request(rq)) {
628 +               what |= BLK_TC_ACT(BLK_TC_PC);
629 +               __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
630 +       } else  {
631 +               what |= BLK_TC_ACT(BLK_TC_FS);
632 +               __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL);
633 +       }
634 +}
635 +
636 +static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
637 +                                    u32 what)
638 +{
639 +       struct blk_trace *bt = q->blk_trace;
640 +
641 +       if (likely(!bt))
642 +               return;
643 +
644 +       __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
645 +}
646 +
647 +static inline void blk_add_trace_generic(struct request_queue *q,
648 +                                        struct bio *bio, int rw, u32 what)
649 +{
650 +       struct blk_trace *bt = q->blk_trace;
651 +
652 +       if (likely(!bt))
653 +               return;
654 +
655 +       if (bio)
656 +               blk_add_trace_bio(q, bio, what);
657 +       else
658 +               __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL);
659 +}
660 +
661 +static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what,
662 +                                        struct bio *bio, unsigned int pdu)
663 +{
664 +       struct blk_trace *bt = q->blk_trace;
665 +       u64 rpdu = cpu_to_be64(pdu);
666 +
667 +       if (likely(!bt))
668 +               return;
669 +
670 +       if (bio)
671 +               __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu);
672 +       else
673 +               __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu);
674 +}
675 +
676 +#else /* !CONFIG_BLK_DEV_IO_TRACE */
677 +#define blk_start_trace(bdev, arg)             (-EINVAL)
678 +#define blk_stop_trace(bdev)                   (-EINVAL)
679 +#define blk_cleanup_trace(bt)                  do { } while (0)
680 +#define blk_add_trace_rq(q, rq, what)          do { } while (0)
681 +#define blk_add_trace_bio(q, rq, what)         do { } while (0)
682 +#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
683 +#define blk_add_trace_pdu_int(q, what, bio, pdu)       do { } while (0)
684 +#endif /* CONFIG_BLK_DEV_IO_TRACE */
685 +
686 +#endif