5 * - Get rid of device list?
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/poll.h>
11 #include <linux/slab.h>
12 #include <linux/idr.h>
13 #include <linux/file.h>
14 #include <linux/miscdevice.h>
15 #include <linux/cdev.h>
16 #include <linux/bio.h>
17 #include <linux/blkdev.h>
22 static LIST_HEAD(b_dev_list);
23 static DEFINE_SPINLOCK(b_dev_lock);
24 static DEFINE_IDR(b_minor_idr);
25 static struct kmem_cache *b_slab;
26 static struct class *b_class;
32 struct list_head device_list;
33 struct list_head done_list;
35 unsigned int done_cmds;
36 wait_queue_head_t wq_done;
37 struct block_device *bdev;
43 struct rcu_head rcu_free;
47 struct list_head list;
50 struct b_user_cmd cmd;
54 static const unsigned long uc_flag_map[__B_FLAG_NR] = {
62 B_REQ_FAILFAST_TRANSPORT,
68 unsigned int data_transfer : 1;
69 unsigned int todevice : 1;
70 unsigned int map_zero : 1;
71 unsigned long rw_flags;
74 static const struct uc_map uc_map[B_TYPE_NR] = {
86 .rw_flags = B_REQ_WRITE,
89 .type = B_TYPE_DISCARD,
93 .rw_flags = B_REQ_DISCARD | B_REQ_WRITE,
96 .type = B_TYPE_READVOID,
102 .type = B_TYPE_WRITEZERO,
106 .rw_flags = B_REQ_WRITE,
109 .type = B_TYPE_READBARRIER,
113 .rw_flags = B_REQ_HARDBARRIER,
116 .type = B_TYPE_WRITEBARRIER,
120 .rw_flags = B_REQ_HARDBARRIER | B_REQ_FLUSH | B_REQ_WRITE,
124 static void b_dev_complete_commands(struct b_dev *bd);
126 static void b_dev_remove_lookup(struct b_dev *bd)
128 if (!list_empty(&bd->device_list)) {
129 list_del_init(&bd->device_list);
130 idr_remove(&b_minor_idr, bd->minor);
134 static void bd_rcu_free(struct rcu_head *head)
136 kfree(container_of(head, struct b_dev, rcu_free));
139 static void b_dev_put(struct b_dev *bd)
141 if (!atomic_dec_and_test(&bd->ref))
144 spin_lock(&b_dev_lock);
145 b_dev_remove_lookup(bd);
146 spin_unlock(&b_dev_lock);
148 b_dev_complete_commands(bd);
150 device_destroy(b_class, MKDEV(b_major, bd->minor));
152 module_put(THIS_MODULE);
154 call_rcu(&bd->rcu_free, bd_rcu_free);
157 static struct b_cmd *get_free_command(struct b_dev *bd)
161 bc = kmem_cache_alloc(b_slab, GFP_KERNEL);
163 memset(bc, 0, sizeof(*bc));
164 INIT_LIST_HEAD(&bc->list);
169 return ERR_PTR(-ENOMEM);
172 static struct b_cmd *get_completed_command(struct b_dev *bd)
174 struct b_cmd *bc = NULL;
176 spin_lock_irq(&bd->lock);
177 if (!list_empty(&bd->done_list)) {
178 bc = list_entry(bd->done_list.next, struct b_cmd, list);
182 spin_unlock_irq(&bd->lock);
186 static struct b_cmd *get_done_command(struct b_dev *bd, int block)
192 bc = get_completed_command(bd);
199 ret = wait_event_interruptible(bd->wq_done, bd->done_cmds);
201 bc = ERR_PTR(-ERESTARTSYS);
209 static void bc_put_bio_pages(struct bio *bio)
214 __bio_for_each_segment(bv, bio, i, 0) {
215 if (bv->bv_page != ZERO_PAGE(0))
216 __free_page(bv->bv_page);
220 static void complete_and_free_bio(struct b_cmd *bc)
223 const struct uc_map *ucm = &uc_map[bc->cmd.type];
225 if (ucm->data_transfer) {
227 bio_unmap_user(bc->bio);
229 bc_put_bio_pages(bc->bio);
236 static void b_dev_complete_commands(struct b_dev *bd)
240 wait_event(bd->wq_done, !atomic_read(&bd->in_flight));
242 while ((bc = get_completed_command(bd)) != NULL)
243 complete_and_free_bio(bc);
246 static int b_dev_validate_command(struct b_user_cmd *buc)
248 if (!binject_buc_check_magic(buc))
255 case B_TYPE_READVOID:
256 case B_TYPE_WRITEZERO:
265 static void b_cmd_endio(struct bio *bio, int error)
267 struct b_cmd *bc = bio->bi_private;
268 struct b_dev *bd = bc->bd;
272 now = ktime_to_ns(ktime_get());
273 bc->cmd.nsec = now - bc->issue_time;
274 bc->cmd.error = error;
276 spin_lock_irqsave(&bd->lock, flags);
277 list_add_tail(&bc->list, &bd->done_list);
279 spin_unlock_irqrestore(&bd->lock, flags);
281 atomic_dec(&bd->in_flight);
283 wake_up(&bd->wq_done);
286 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
287 static int bio_cmd_endio(struct bio *bio, unsigned int bytes, int err)
292 b_cmd_endio(bio, err);
296 static void bio_cmd_endio(struct bio *bio, int err)
298 b_cmd_endio(bio, err);
302 #define len_to_pages(len) ((len + PAGE_SIZE - 1) / PAGE_SIZE)
304 static int zero_map_bio(struct request_queue *q, struct bio *bio,
305 const struct uc_map *ucm, unsigned int len)
307 unsigned int i, nr_pages, this_len, ret, err;
310 nr_pages = len_to_pages(len);
311 for (i = 0; i < nr_pages; i++) {
315 page = alloc_page(GFP_KERNEL);
322 this_len = PAGE_SIZE;
326 ret = bio_add_pc_page(q, bio, page, this_len, 0);
327 if (ret < this_len) {
334 bc_put_bio_pages(bio);
338 static void map_uc_to_bio_flags(struct bio *bio, struct b_user_cmd *uc)
342 for (i = 0; i < 8 * sizeof(uc->flags); i++) {
345 if (uc->flags & (1UL << i))
346 bio->bi_rw |= uc_flag_map[i];
348 mask = ~((1UL << i) - 1);
349 if (!(mask & uc->flags))
354 static struct bio *map_uc_to_bio(struct b_dev *bd, struct b_user_cmd *uc)
356 struct request_queue *q = bdev_get_queue(bd->bdev);
357 const struct uc_map *ucm = &uc_map[uc->type];
360 if (ucm->data_transfer && !ucm->map_zero) {
361 bio = binject_map_bio(q, bd->bdev, uc->buf, uc->len,
362 !ucm->todevice, GFP_KERNEL);
364 bio = bio_alloc(GFP_KERNEL, len_to_pages(uc->len));
366 bio->bi_bdev = bd->bdev;
367 if (ucm->map_zero && uc->len) {
370 err = zero_map_bio(q, bio, ucm, uc->len);
376 bio->bi_size = uc->len;
381 bio = ERR_PTR(-ENOMEM);
382 else if (!IS_ERR(bio)) {
383 map_uc_to_bio_flags(bio, uc);
384 bio->bi_sector = uc->offset / binject_get_bs(q);
385 bio->bi_rw |= ucm->rw_flags;
391 static int b_dev_add_command(struct b_dev *bd, struct b_cmd *bc)
393 struct b_user_cmd *uc = &bc->cmd;
396 bio = map_uc_to_bio(bd, uc);
403 bio->bi_end_io = bio_cmd_endio;
404 bio->bi_private = bc;
406 bc->issue_time = ktime_to_ns(ktime_get());
408 atomic_inc(&bd->in_flight);
409 submit_bio(bio->bi_rw, bio);
413 static void b_dev_free_command(struct b_dev *bd, struct b_cmd *bc)
415 kmem_cache_free(b_slab, bc);
419 * We are always writable, as we have an infinite queue depth
421 static unsigned int b_dev_poll(struct file *file, poll_table *wait)
423 struct b_dev *bd = file->private_data;
424 unsigned int mask = POLLOUT;
426 poll_wait(file, &bd->wq_done, wait);
428 spin_lock_irq(&bd->lock);
429 if (!list_empty(&bd->done_list))
430 mask |= POLLIN | POLLRDNORM;
431 spin_unlock_irq(&bd->lock);
436 static int b_dev_release(struct inode *inode, struct file *file)
438 struct b_dev *bd = file->private_data;
444 static struct b_dev *b_dev_lookup(int minor)
450 bd = idr_find(&b_minor_idr, minor);
451 if (bd && !atomic_inc_not_zero(&bd->ref))
458 static int b_dev_open(struct inode *inode, struct file *file)
462 bd = b_dev_lookup(iminor(inode));
466 file->private_data = bd;
470 static ssize_t b_dev_write(struct file *file, const char __user *buf,
471 size_t count, loff_t *ppos)
473 struct b_dev *bd = file->private_data;
474 struct b_cmd *bc = NULL;
479 if (count % sizeof(struct b_user_cmd))
482 total = count / sizeof(struct b_user_cmd);
484 bc = get_free_command(bd);
491 if (copy_from_user(&bc->cmd, buf, sizeof(struct b_user_cmd))) {
496 err = b_dev_validate_command(&bc->cmd);
500 err = b_dev_add_command(bd, bc);
504 done += sizeof(struct b_user_cmd);
505 buf += sizeof(struct b_user_cmd);
511 b_dev_free_command(bd, bc);
520 static ssize_t b_dev_read(struct file *file, char __user *buf, size_t count,
523 struct b_dev *bd = file->private_data;
528 if (count % sizeof(struct b_user_cmd))
531 total = count / sizeof(struct b_user_cmd);
535 bc = get_done_command(bd, !(file->f_flags & O_NONBLOCK));
541 complete_and_free_bio(bc);
543 if (copy_to_user(buf, &bc->cmd, sizeof(bc->cmd)))
546 b_dev_free_command(bd, bc);
551 done += sizeof(struct b_user_cmd);
552 buf += sizeof(struct b_user_cmd);
563 static const struct file_operations b_dev_fops = {
565 .release = b_dev_release,
567 .write = b_dev_write,
569 .owner = THIS_MODULE,
572 static int b_del_dev(struct b_ioctl_cmd *bic)
576 bd = b_dev_lookup(bic->minor);
578 spin_lock(&b_dev_lock);
579 b_dev_remove_lookup(bd);
580 spin_unlock(&b_dev_lock);
583 * Our lookup grabbed a reference, drop two
593 static int b_add_dev(struct b_ioctl_cmd *bic)
600 file = fget(bic->fd);
604 __module_get(THIS_MODULE);
606 inode = file->f_mapping->host;
607 if (!S_ISBLK(inode->i_mode)) {
612 ret = idr_pre_get(&b_minor_idr, GFP_KERNEL);
618 bd = kzalloc(sizeof(*bd), GFP_KERNEL);
624 atomic_set(&bd->ref, 1);
625 spin_lock_init(&bd->lock);
626 INIT_LIST_HEAD(&bd->done_list);
627 init_waitqueue_head(&bd->wq_done);
629 bd->bdev = inode->i_bdev;;
631 spin_lock(&b_dev_lock);
633 ret = idr_get_new(&b_minor_idr, bd, &bd->minor);
637 if (bd->minor >= B_MAX_DEVS)
640 spin_unlock(&b_dev_lock);
642 INIT_LIST_HEAD(&bd->device_list);
643 bd->dev = binject_device_create(b_class, NULL,
644 MKDEV(b_major, bd->minor), bd, "binject%d", bd->minor);
646 spin_lock(&b_dev_lock);
651 list_add_tail(&bd->device_list, &b_dev_list);
652 spin_unlock(&b_dev_lock);
655 idr_remove(&b_minor_idr, bd->minor);
657 spin_unlock(&b_dev_lock);
661 module_put(THIS_MODULE);
665 static long b_misc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
667 int __user *uarg = (int __user *) arg;
668 struct b_ioctl_cmd bic;
670 if (copy_from_user(&bic, uarg, sizeof(bic)))
675 return b_add_dev(&bic);
677 return b_del_dev(&bic);
685 static const struct file_operations b_misc_fops = {
686 .unlocked_ioctl = b_misc_ioctl,
687 .owner = THIS_MODULE,
690 static struct miscdevice b_misc_dev = {
691 .minor = MISC_DYNAMIC_MINOR,
692 .name = "binject-ctl",
693 .fops = &b_misc_fops,
696 static void __exit b_exit(void)
699 kmem_cache_destroy(b_slab);
700 class_destroy(b_class);
701 misc_deregister(&b_misc_dev);
704 static int __init b_init(void)
708 b_slab = binject_create_slab("binject", sizeof(struct b_cmd));
710 printk(KERN_ERR "binject: failed to create cmd slab\n");
714 ret = misc_register(&b_misc_dev);
718 b_major = register_chrdev(0, "binject", &b_dev_fops);
722 b_class = class_create(THIS_MODULE, "binject");
728 unregister_chrdev(b_major, "binject");
730 misc_deregister(&b_misc_dev);
732 kmem_cache_destroy(b_slab);
739 MODULE_LICENSE("GPL");
740 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");