5 * - Get rid of device list?
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/poll.h>
11 #include <linux/slab.h>
12 #include <linux/idr.h>
13 #include <linux/file.h>
14 #include <linux/miscdevice.h>
15 #include <linux/cdev.h>
16 #include <linux/bio.h>
17 #include <linux/blkdev.h>
22 static LIST_HEAD(b_dev_list);
23 static DEFINE_SPINLOCK(b_dev_lock);
24 static DEFINE_IDR(b_minor_idr);
25 static struct kmem_cache *b_slab;
26 static struct class *b_class;
33 struct list_head done_list;
37 struct list_head device_list;
38 struct list_head reaped_done;
41 wait_queue_head_t wq_done;
42 struct block_device *bdev;
47 struct b_dev_cpu __percpu *cpu_queue;
48 struct rcu_head rcu_free;
52 struct list_head list;
55 struct b_user_cmd cmd;
59 static const unsigned long uc_flag_map[__B_FLAG_NR] = {
67 B_REQ_FAILFAST_TRANSPORT,
73 unsigned int data_transfer : 1;
74 unsigned int todevice : 1;
75 unsigned int map_zero : 1;
76 unsigned long rw_flags;
79 static const struct uc_map uc_map[B_TYPE_NR] = {
91 .rw_flags = B_REQ_WRITE,
94 .type = B_TYPE_DISCARD,
98 .rw_flags = B_REQ_DISCARD | B_REQ_WRITE,
101 .type = B_TYPE_READVOID,
107 .type = B_TYPE_WRITEZERO,
111 .rw_flags = B_REQ_WRITE,
114 .type = B_TYPE_READBARRIER,
118 .rw_flags = B_REQ_HARDBARRIER,
121 .type = B_TYPE_WRITEBARRIER,
125 .rw_flags = B_REQ_HARDBARRIER | B_REQ_FLUSH | B_REQ_WRITE,
129 static void b_dev_complete_commands(struct b_dev *bd);
131 static void b_dev_remove_lookup(struct b_dev *bd)
133 if (!list_empty(&bd->device_list)) {
134 list_del_init(&bd->device_list);
135 idr_remove(&b_minor_idr, bd->minor);
139 static void bd_rcu_free(struct rcu_head *head)
141 struct b_dev *bd = container_of(head, struct b_dev, rcu_free);
143 free_percpu(bd->cpu_queue);
147 static void b_dev_put(struct b_dev *bd)
149 if (!atomic_dec_and_test(&bd->ref))
152 spin_lock(&b_dev_lock);
153 b_dev_remove_lookup(bd);
154 spin_unlock(&b_dev_lock);
156 b_dev_complete_commands(bd);
158 device_destroy(b_class, MKDEV(b_major, bd->minor));
160 module_put(THIS_MODULE);
162 call_rcu(&bd->rcu_free, bd_rcu_free);
165 static struct b_cmd *get_free_command(struct b_dev *bd)
169 bc = kmem_cache_alloc(b_slab, GFP_KERNEL);
175 return ERR_PTR(-ENOMEM);
178 static struct b_cmd *get_completed_command(struct b_dev *bd)
180 struct b_cmd *bc = NULL;
181 int cpu, spliced = 0;
183 spin_lock(&bd->done_lock);
184 if (!list_empty(&bd->reaped_done)) {
186 bc = list_entry(bd->reaped_done.next, struct b_cmd, list);
187 list_del_init(&bc->list);
189 spin_unlock(&bd->done_lock);
194 spin_lock(&bd->done_lock);
195 for_each_possible_cpu(cpu) {
196 struct b_dev_cpu *bdc = per_cpu_ptr(bd->cpu_queue, cpu);
198 spin_lock_irq(&bdc->lock);
199 if (!list_empty(&bdc->done_list)) {
200 list_splice_init(&bdc->done_list, &bd->reaped_done);
203 spin_unlock_irq(&bdc->lock);
209 spin_unlock(&bd->done_lock);
213 static int bd_pending_done(struct b_dev *bd)
217 for_each_possible_cpu(cpu) {
218 struct b_dev_cpu *bdc = per_cpu_ptr(bd->cpu_queue, cpu);
220 if (!list_empty_careful(&bdc->done_list))
227 static struct b_cmd *get_done_command(struct b_dev *bd, int block)
233 bc = get_completed_command(bd);
240 ret = wait_event_interruptible(bd->wq_done, bd_pending_done(bd));
242 bc = ERR_PTR(-ERESTARTSYS);
250 static void bc_put_bio_pages(struct bio *bio)
255 __bio_for_each_segment(bv, bio, i, 0) {
256 if (bv->bv_page != ZERO_PAGE(0))
257 __free_page(bv->bv_page);
261 static void complete_and_free_bio(struct b_cmd *bc)
264 const struct uc_map *ucm = &uc_map[bc->cmd.type];
266 if (ucm->data_transfer) {
268 bio_unmap_user(bc->bio);
270 bc_put_bio_pages(bc->bio);
277 static void b_dev_complete_commands(struct b_dev *bd)
281 wait_event(bd->wq_done, !atomic_read(&bd->in_flight));
283 while ((bc = get_completed_command(bd)) != NULL)
284 complete_and_free_bio(bc);
287 static int b_dev_validate_command(struct b_user_cmd *buc)
291 if (!binject_buc_check_magic(buc))
294 for (i = 0; i < B_TYPE_NR; i++) {
295 const struct uc_map *ucm = &uc_map[i];
297 if (ucm->type != buc->type)
299 if (ucm->data_transfer && !buc->len)
308 static void b_cmd_endio(struct bio *bio, int error)
310 struct b_cmd *bc = bio->bi_private;
311 struct b_dev *bd = bc->bd;
312 struct b_dev_cpu *bdc;
316 now = ktime_to_ns(ktime_get());
317 bc->cmd.nsec = now - bc->issue_time;
318 bc->cmd.error = error;
320 local_irq_save(flags);
321 bdc = per_cpu_ptr(bd->cpu_queue, smp_processor_id());
323 spin_lock(&bdc->lock);
324 list_add_tail(&bc->list, &bdc->done_list);
325 spin_unlock_irqrestore(&bdc->lock, flags);
327 atomic_dec(&bd->in_flight);
330 if (waitqueue_active(&bd->wq_done))
331 wake_up(&bd->wq_done);
334 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
335 static int bio_cmd_endio(struct bio *bio, unsigned int bytes, int err)
340 b_cmd_endio(bio, err);
344 static void bio_cmd_endio(struct bio *bio, int err)
346 b_cmd_endio(bio, err);
350 #define len_to_pages(len) ((len + PAGE_SIZE - 1) / PAGE_SIZE)
352 static int zero_map_bio(struct request_queue *q, struct bio *bio,
353 const struct uc_map *ucm, unsigned int len)
355 unsigned int i, nr_pages, this_len, ret, err;
358 nr_pages = len_to_pages(len);
359 for (i = 0; i < nr_pages; i++) {
363 page = alloc_page(GFP_KERNEL);
370 this_len = PAGE_SIZE;
374 ret = bio_add_pc_page(q, bio, page, this_len, 0);
375 if (ret < this_len) {
382 bc_put_bio_pages(bio);
386 static void map_uc_to_bio_flags(struct bio *bio, struct b_user_cmd *uc)
390 for (i = 0; i < 8 * sizeof(uc->flags); i++) {
393 if (uc->flags & (1UL << i))
394 bio->bi_rw |= uc_flag_map[i];
396 mask = ~((1UL << i) - 1);
397 if (!(mask & uc->flags))
402 static struct bio *map_uc_to_bio(struct b_dev *bd, struct b_user_cmd *uc)
404 struct request_queue *q = bdev_get_queue(bd->bdev);
405 const struct uc_map *ucm = &uc_map[uc->type];
408 if (ucm->data_transfer && !ucm->map_zero) {
409 bio = binject_map_bio(q, bd->bdev, uc->buf, uc->len,
410 !ucm->todevice, GFP_KERNEL);
412 bio = bio_alloc(GFP_KERNEL, len_to_pages(uc->len));
414 bio->bi_bdev = bd->bdev;
415 if (ucm->map_zero && uc->len) {
418 err = zero_map_bio(q, bio, ucm, uc->len);
424 bio->bi_size = uc->len;
429 bio = ERR_PTR(-ENOMEM);
430 else if (!IS_ERR(bio)) {
431 map_uc_to_bio_flags(bio, uc);
432 bio->bi_sector = uc->offset / binject_get_bs(q);
433 bio->bi_rw |= ucm->rw_flags;
439 static int b_dev_add_command(struct b_dev *bd, struct b_cmd *bc)
441 struct b_user_cmd *uc = &bc->cmd;
444 bio = map_uc_to_bio(bd, uc);
451 bio->bi_end_io = bio_cmd_endio;
452 bio->bi_private = bc;
454 bc->issue_time = ktime_to_ns(ktime_get());
456 atomic_inc(&bd->in_flight);
457 submit_bio(bio->bi_rw, bio);
461 static void b_dev_free_command(struct b_dev *bd, struct b_cmd *bc)
463 BUG_ON(!list_empty(&bc->list));
464 kmem_cache_free(b_slab, bc);
468 * We are always writable, as we have an infinite queue depth
470 static unsigned int b_dev_poll(struct file *file, poll_table *wait)
472 struct b_dev *bd = file->private_data;
473 unsigned int mask = POLLOUT;
475 poll_wait(file, &bd->wq_done, wait);
477 if (bd_pending_done(bd))
478 mask |= POLLIN | POLLRDNORM;
483 static int b_dev_release(struct inode *inode, struct file *file)
485 struct b_dev *bd = file->private_data;
491 static struct b_dev *b_dev_lookup(int minor)
497 bd = idr_find(&b_minor_idr, minor);
498 if (bd && !atomic_inc_not_zero(&bd->ref))
505 static int b_dev_open(struct inode *inode, struct file *file)
509 bd = b_dev_lookup(iminor(inode));
513 file->private_data = bd;
517 static ssize_t b_dev_write(struct file *file, const char __user *buf,
518 size_t count, loff_t *ppos)
520 struct b_dev *bd = file->private_data;
521 struct b_cmd *bc = NULL;
526 if (count % sizeof(struct b_user_cmd))
529 total = count / sizeof(struct b_user_cmd);
531 bc = get_free_command(bd);
538 if (copy_from_user(&bc->cmd, buf, sizeof(struct b_user_cmd))) {
543 err = b_dev_validate_command(&bc->cmd);
547 err = b_dev_add_command(bd, bc);
551 done += sizeof(struct b_user_cmd);
552 buf += sizeof(struct b_user_cmd);
558 b_dev_free_command(bd, bc);
567 static ssize_t b_dev_read(struct file *file, char __user *buf, size_t count,
570 struct b_dev *bd = file->private_data;
575 if (count % sizeof(struct b_user_cmd))
578 total = count / sizeof(struct b_user_cmd);
582 bc = get_done_command(bd, !(file->f_flags & O_NONBLOCK));
588 complete_and_free_bio(bc);
590 if (copy_to_user(buf, &bc->cmd, sizeof(bc->cmd)))
593 b_dev_free_command(bd, bc);
598 done += sizeof(struct b_user_cmd);
599 buf += sizeof(struct b_user_cmd);
610 static const struct file_operations b_dev_fops = {
612 .release = b_dev_release,
614 .write = b_dev_write,
616 .owner = THIS_MODULE,
619 static int b_del_dev(struct b_ioctl_cmd *bic)
623 bd = b_dev_lookup(bic->minor);
625 spin_lock(&b_dev_lock);
626 b_dev_remove_lookup(bd);
627 spin_unlock(&b_dev_lock);
630 * Our lookup grabbed a reference, drop two
640 static int b_add_dev(struct b_ioctl_cmd *bic)
647 file = fget(bic->fd);
651 __module_get(THIS_MODULE);
653 inode = file->f_mapping->host;
654 if (!S_ISBLK(inode->i_mode)) {
659 ret = idr_pre_get(&b_minor_idr, GFP_KERNEL);
665 bd = kzalloc(sizeof(*bd), GFP_KERNEL);
671 bd->cpu_queue = alloc_percpu(struct b_dev_cpu);
672 if (!bd->cpu_queue) {
678 for_each_possible_cpu(cpu) {
679 struct b_dev_cpu *bdc;
681 bdc = per_cpu_ptr(bd->cpu_queue, cpu);
682 INIT_LIST_HEAD(&bdc->done_list);
683 spin_lock_init(&bdc->lock);
686 atomic_set(&bd->ref, 1);
687 spin_lock_init(&bd->done_lock);
688 INIT_LIST_HEAD(&bd->reaped_done);
689 init_waitqueue_head(&bd->wq_done);
691 bd->bdev = inode->i_bdev;;
693 spin_lock(&b_dev_lock);
695 ret = idr_get_new(&b_minor_idr, bd, &bd->minor);
699 if (bd->minor >= B_MAX_DEVS)
702 spin_unlock(&b_dev_lock);
704 INIT_LIST_HEAD(&bd->device_list);
705 bd->dev = binject_device_create(b_class, NULL,
706 MKDEV(b_major, bd->minor), bd, "binject%d", bd->minor);
708 spin_lock(&b_dev_lock);
713 list_add_tail(&bd->device_list, &b_dev_list);
714 bic->minor = bd->minor;
715 spin_unlock(&b_dev_lock);
718 idr_remove(&b_minor_idr, bd->minor);
720 spin_unlock(&b_dev_lock);
721 free_percpu(bd->cpu_queue);
725 module_put(THIS_MODULE);
729 static long b_misc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
731 int __user *uarg = (int __user *) arg;
732 struct b_ioctl_cmd bic;
735 if (copy_from_user(&bic, uarg, sizeof(bic)))
740 ret = b_add_dev(&bic);
741 if (!ret && copy_to_user(uarg, &bic, sizeof(bic))) {
747 ret = b_del_dev(&bic);
756 static const struct file_operations b_misc_fops = {
757 .unlocked_ioctl = b_misc_ioctl,
758 .owner = THIS_MODULE,
761 static struct miscdevice b_misc_dev = {
762 .minor = MISC_DYNAMIC_MINOR,
763 .name = "binject-ctl",
764 .fops = &b_misc_fops,
767 static void __exit b_exit(void)
770 kmem_cache_destroy(b_slab);
771 class_destroy(b_class);
772 misc_deregister(&b_misc_dev);
775 static void __b_cmd_init_once(struct b_cmd *bc)
777 INIT_LIST_HEAD(&bc->list);
780 #ifdef KCOMPAT_OLD_SLAB
781 static void b_cmd_init_once(void *data, kmem_cache_t *slab, unsigned long flags)
783 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
784 SLAB_CTOR_CONSTRUCTOR)
785 __b_cmd_init_once(data);
788 static void b_cmd_init_once(void *data)
790 __b_cmd_init_once(data);
794 static int __init b_init(void)
798 b_slab = binject_create_slab("binject", sizeof(struct b_cmd),
799 SLAB_HWCACHE_ALIGN, b_cmd_init_once);
801 printk(KERN_ERR "binject: failed to create cmd slab\n");
805 ret = misc_register(&b_misc_dev);
809 b_major = register_chrdev(0, "binject", &b_dev_fops);
813 b_class = class_create(THIS_MODULE, "binject");
819 unregister_chrdev(b_major, "binject");
821 misc_deregister(&b_misc_dev);
823 kmem_cache_destroy(b_slab);
830 MODULE_LICENSE("GPL");
831 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");