--- /dev/null
+#ifndef BINJECT_KCOMPAT_H
+#define BINJECT_KCOMPAT_H
+
+#include <linux/version.h>
+#include <linux/slab.h>
+
+struct bio *binject_map_bio(struct request_queue *q, struct block_device *bdev,
+ unsigned long uaddr, unsigned int len, int to_vm,
+ gfp_t gfp_mask);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
+#define binject_device_create device_create
+#else
+#define binject_device_create(a, b, c, d, args...) ( \
+{ \
+ struct device *__dev; \
+ __dev = device_create((a), (b), (c), ##args); \
+ if (!IS_ERR(__dev)) \
+ dev_set_drvdata(__dev, (d)); \
+ __dev; \
+} \
+)
+#endif
+
+static inline unsigned int binject_get_bs(struct request_queue *q)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 18)
+ return queue_physical_block_size(q);
+#else
+ return queue_hardsect_size(q);
+#endif
+}
+
+static inline void binject_mark_bio_write(struct bio *bio)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
+ bio->bi_rw |= REQ_WRITE;
+#else
+ bio->bi_rw |= (1 << BIO_RW);
+#endif
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
+static inline ktime_t ktime_get(void)
+{
+ struct timespec t;
+
+ ktime_get_ts(&t);
+ return timespec_to_ktime(t);
+}
+#endif
+
+static inline struct kmem_cache *binject_create_slab(const char *name,
+ size_t obj_size)
+{
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 26)
+ return kmem_cache_create(name, obj_size, 0, 0, NULL);
+#else
+ return kmem_cache_create(name, obj_size, 0, 0, NULL, NULL);
+#endif
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 35)
+#define B_REQ_WRITE REQ_WRITE
+#define B_REQ_SYNC REQ_SYNC
+#define B_REQ_UNPLUG REQ_UNPLUG
+#define B_REQ_NOIDLE REQ_NOIDLE
+#define B_REQ_HARDBARRIER REQ_HARDBARRIER
+#define B_REQ_META REQ_META
+#define B_REQ_RAHEAD REQ_RAHEAD
+#define B_REQ_FAILFAST_DEV REQ_FAILFAST_DEV
+#define B_REQ_FAILFAST_TRANSPORT REQ_FAILFAST_TRANSPORT
+#define B_REQ_FAILFAST_DRIVER REQ_FAILFAST_DRIVER
+#define B_REQ_DISCARD REQ_DISCARD
+#define B_REQ_FLUSH REQ_FLUSH
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 18)
+#define B_REQ_WRITE (1 << BIO_RW)
+#define B_REQ_SYNC (1 << BIO_RW_SYNC)
+#define B_REQ_UNPLUG 0
+#define B_REQ_NOIDLE 0
+#define B_REQ_HARDBARRIER (1 << BIO_RW_BARRIER)
+#define B_REQ_META 0
+#define B_REQ_RAHEAD (1 << BIO_RW_AHEAD)
+#define B_REQ_FAILFAST_DEV (1 << BIO_RW_FAILFAST)
+#define B_REQ_FAILFAST_TRANSPORT (1 << BIO_RW_FAILFAST)
+#define B_REQ_FAILFAST_DRIVER (1 << BIO_RW_FAILFAST)
+#define B_REQ_DISCARD 0
+#define B_REQ_FLUSH 0
+#else
+#error The kernel is too old
+#endif
+
+#endif
--- /dev/null
+/*
+ * TODO
+ *
+ * - Proper ioctls
+ * - Get rid of device list?
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/file.h>
+#include <linux/miscdevice.h>
+#include <linux/cdev.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+
+#include "kcompat.h"
+#include "binject.h"
+
+static LIST_HEAD(b_dev_list);
+static DEFINE_SPINLOCK(b_dev_lock);
+static DEFINE_IDR(b_minor_idr);
+static struct kmem_cache *b_slab;
+static struct class *b_class;
+static int b_major;
+
+#define B_MAX_DEVS 64
+
+struct b_dev {
+ struct list_head device_list;
+ struct list_head done_list;
+ atomic_t in_flight;
+ unsigned int done_cmds;
+ wait_queue_head_t wq_done;
+ struct block_device *bdev;
+ spinlock_t lock;
+ atomic_t ref;
+ struct file *file;
+ struct device *dev;
+ int minor;
+ struct rcu_head rcu_free;
+};
+
+struct b_cmd {
+ struct list_head list;
+ struct b_dev *bd;
+ struct bio *bio;
+ struct b_user_cmd cmd;
+ u64 issue_time;
+};
+
+static const unsigned long uc_flag_map[__B_FLAG_NR] = {
+ B_REQ_SYNC,
+ B_REQ_UNPLUG,
+ B_REQ_NOIDLE,
+ B_REQ_HARDBARRIER,
+ B_REQ_META,
+ B_REQ_RAHEAD,
+ B_REQ_FAILFAST_DEV,
+ B_REQ_FAILFAST_TRANSPORT,
+ B_REQ_FAILFAST_DRIVER
+};
+
+struct uc_map {
+ int type;
+ unsigned int data_transfer : 1;
+ unsigned int todevice : 1;
+ unsigned int map_zero : 1;
+ unsigned long rw_flags;
+};
+
+static const struct uc_map uc_map[B_TYPE_NR] = {
+ {
+ .type = B_TYPE_READ,
+ .data_transfer = 1,
+ .todevice = 0,
+ .map_zero = 0,
+ },
+ {
+ .type = B_TYPE_WRITE,
+ .data_transfer = 1,
+ .todevice = 1,
+ .map_zero = 0,
+ .rw_flags = B_REQ_WRITE,
+ },
+ {
+ .type = B_TYPE_DISCARD,
+ .data_transfer = 0,
+ .todevice = 0,
+ .map_zero = 0,
+ .rw_flags = B_REQ_DISCARD | B_REQ_WRITE,
+ },
+ {
+ .type = B_TYPE_READVOID,
+ .data_transfer = 1,
+ .todevice = 0,
+ .map_zero = 1,
+ },
+ {
+ .type = B_TYPE_WRITEZERO,
+ .data_transfer = 1,
+ .todevice = 1,
+ .map_zero = 1,
+ .rw_flags = B_REQ_WRITE,
+ },
+ {
+ .type = B_TYPE_READBARRIER,
+ .data_transfer = 1,
+ .todevice = 0,
+ .map_zero = 0,
+ .rw_flags = B_REQ_HARDBARRIER,
+ },
+ {
+ .type = B_TYPE_WRITEBARRIER,
+ .data_transfer = 1,
+ .todevice = 1,
+ .map_zero = 0,
+ .rw_flags = B_REQ_HARDBARRIER | B_REQ_FLUSH | B_REQ_WRITE,
+ }
+};
+
+static void b_dev_complete_commands(struct b_dev *bd);
+
+static void b_dev_remove_lookup(struct b_dev *bd)
+{
+ if (!list_empty(&bd->device_list)) {
+ list_del_init(&bd->device_list);
+ idr_remove(&b_minor_idr, bd->minor);
+ }
+}
+
+static void bd_rcu_free(struct rcu_head *head)
+{
+ kfree(container_of(head, struct b_dev, rcu_free));
+}
+
+static void b_dev_put(struct b_dev *bd)
+{
+ if (!atomic_dec_and_test(&bd->ref))
+ return;
+
+ spin_lock(&b_dev_lock);
+ b_dev_remove_lookup(bd);
+ spin_unlock(&b_dev_lock);
+
+ b_dev_complete_commands(bd);
+
+ device_destroy(b_class, MKDEV(b_major, bd->minor));
+ fput(bd->file);
+ module_put(THIS_MODULE);
+
+ call_rcu(&bd->rcu_free, bd_rcu_free);
+}
+
+static struct b_cmd *get_free_command(struct b_dev *bd)
+{
+ struct b_cmd *bc;
+
+ bc = kmem_cache_alloc(b_slab, GFP_KERNEL);
+ if (bc) {
+ memset(bc, 0, sizeof(*bc));
+ INIT_LIST_HEAD(&bc->list);
+ bc->bd = bd;
+ return bc;
+ }
+
+ return ERR_PTR(-ENOMEM);
+}
+
+static struct b_cmd *get_completed_command(struct b_dev *bd)
+{
+ struct b_cmd *bc = NULL;
+
+ spin_lock_irq(&bd->lock);
+ if (!list_empty(&bd->done_list)) {
+ bc = list_entry(bd->done_list.next, struct b_cmd, list);
+ bd->done_cmds--;
+ list_del(&bc->list);
+ }
+ spin_unlock_irq(&bd->lock);
+ return bc;
+}
+
+static struct b_cmd *get_done_command(struct b_dev *bd, int block)
+{
+ struct b_cmd *bc;
+ int ret;
+
+ do {
+ bc = get_completed_command(bd);
+ if (bc)
+ break;
+
+ if (!block)
+ break;
+
+ ret = wait_event_interruptible(bd->wq_done, bd->done_cmds);
+ if (ret) {
+ bc = ERR_PTR(-ERESTARTSYS);
+ break;
+ }
+ } while (1);
+
+ return bc;
+}
+
+static void bc_put_bio_pages(struct bio *bio)
+{
+ struct bio_vec *bv;
+ unsigned int i;
+
+ __bio_for_each_segment(bv, bio, i, 0) {
+ if (bv->bv_page != ZERO_PAGE(0))
+ __free_page(bv->bv_page);
+ }
+}
+
+static void complete_and_free_bio(struct b_cmd *bc)
+{
+ if (bc->bio) {
+ const struct uc_map *ucm = &uc_map[bc->cmd.type];
+
+ if (ucm->data_transfer) {
+ if (!ucm->map_zero)
+ bio_unmap_user(bc->bio);
+ else
+ bc_put_bio_pages(bc->bio);
+ }
+ bio_put(bc->bio);
+ bc->bio = NULL;
+ }
+}
+
+static void b_dev_complete_commands(struct b_dev *bd)
+{
+ struct b_cmd *bc;
+
+ wait_event(bd->wq_done, !atomic_read(&bd->in_flight));
+
+ while ((bc = get_completed_command(bd)) != NULL)
+ complete_and_free_bio(bc);
+}
+
+static int b_dev_validate_command(struct b_user_cmd *buc)
+{
+ if (!binject_buc_check_magic(buc))
+ return -EINVAL;
+
+ switch (buc->type) {
+ case B_TYPE_WRITE:
+ case B_TYPE_READ:
+ case B_TYPE_DISCARD:
+ case B_TYPE_READVOID:
+ case B_TYPE_WRITEZERO:
+ if (buc->len)
+ return 0;
+ return -EINVAL;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void b_cmd_endio(struct bio *bio, int error)
+{
+ struct b_cmd *bc = bio->bi_private;
+ struct b_dev *bd = bc->bd;
+ unsigned long flags;
+ unsigned long now;
+
+ now = ktime_to_ns(ktime_get());
+ bc->cmd.nsec = now - bc->issue_time;
+ bc->cmd.error = error;
+
+ spin_lock_irqsave(&bd->lock, flags);
+ list_add_tail(&bc->list, &bd->done_list);
+ bd->done_cmds++;
+ spin_unlock_irqrestore(&bd->lock, flags);
+
+ atomic_dec(&bd->in_flight);
+
+ wake_up(&bd->wq_done);
+}
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
+static int bio_cmd_endio(struct bio *bio, unsigned int bytes, int err)
+{
+ if (bio->bi_size)
+ return 1;
+
+ b_cmd_endio(bio, err);
+ return 0;
+}
+#else
+static void bio_cmd_endio(struct bio *bio, int err)
+{
+ b_cmd_endio(bio, err);
+}
+#endif
+
+#define len_to_pages(len) ((len + PAGE_SIZE - 1) / PAGE_SIZE)
+
+static int zero_map_bio(struct request_queue *q, struct bio *bio,
+ const struct uc_map *ucm, unsigned int len)
+{
+ unsigned int i, nr_pages, this_len, ret, err;
+ struct page *page;
+
+ nr_pages = len_to_pages(len);
+ for (i = 0; i < nr_pages; i++) {
+ if (ucm->todevice)
+ page = ZERO_PAGE(0);
+ else {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto oom;
+ }
+ }
+
+ this_len = PAGE_SIZE;
+ if (this_len > len)
+ this_len = len;
+
+ ret = bio_add_pc_page(q, bio, page, this_len, 0);
+ if (ret < this_len) {
+ err = -E2BIG;
+ goto oom;
+ }
+ }
+ return 0;
+oom:
+ bc_put_bio_pages(bio);
+ return err;
+}
+
+static void map_uc_to_bio_flags(struct bio *bio, struct b_user_cmd *uc)
+{
+ unsigned int i;
+
+ for (i = 0; i < 8 * sizeof(uc->flags); i++) {
+ unsigned long mask;
+
+ if (uc->flags & (1UL << i))
+ bio->bi_rw |= uc_flag_map[i];
+
+ mask = ~((1UL << i) - 1);
+ if (!(mask & uc->flags))
+ break;
+ }
+}
+
+static struct bio *map_uc_to_bio(struct b_dev *bd, struct b_user_cmd *uc)
+{
+ struct request_queue *q = bdev_get_queue(bd->bdev);
+ const struct uc_map *ucm = &uc_map[uc->type];
+ struct bio *bio;
+
+ if (ucm->data_transfer && !ucm->map_zero) {
+ bio = binject_map_bio(q, bd->bdev, uc->buf, uc->len,
+ !ucm->todevice, GFP_KERNEL);
+ } else {
+ bio = bio_alloc(GFP_KERNEL, len_to_pages(uc->len));
+ if (bio) {
+ bio->bi_bdev = bd->bdev;
+ if (ucm->map_zero && uc->len) {
+ int err;
+
+ err = zero_map_bio(q, bio, ucm, uc->len);
+ if (err) {
+ bio_put(bio);
+ bio = ERR_PTR(err);
+ }
+ } else
+ bio->bi_size = uc->len;
+ }
+ }
+
+ if (!bio)
+ bio = ERR_PTR(-ENOMEM);
+ else if (!IS_ERR(bio)) {
+ map_uc_to_bio_flags(bio, uc);
+ bio->bi_sector = uc->offset / binject_get_bs(q);
+ bio->bi_rw |= ucm->rw_flags;
+ }
+
+ return bio;
+}
+
+static int b_dev_add_command(struct b_dev *bd, struct b_cmd *bc)
+{
+ struct b_user_cmd *uc = &bc->cmd;
+ struct bio *bio;
+
+ bio = map_uc_to_bio(bd, uc);
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ bio_get(bio);
+ bc->bio = bio;
+
+ bio->bi_end_io = bio_cmd_endio;
+ bio->bi_private = bc;
+
+ bc->issue_time = ktime_to_ns(ktime_get());
+
+ atomic_inc(&bd->in_flight);
+ submit_bio(bio->bi_rw, bio);
+ return 0;
+}
+
+static void b_dev_free_command(struct b_dev *bd, struct b_cmd *bc)
+{
+ kmem_cache_free(b_slab, bc);
+}
+
+/*
+ * We are always writable, as we have an infinite queue depth
+ */
+static unsigned int b_dev_poll(struct file *file, poll_table *wait)
+{
+ struct b_dev *bd = file->private_data;
+ unsigned int mask = POLLOUT;
+
+ poll_wait(file, &bd->wq_done, wait);
+
+ spin_lock_irq(&bd->lock);
+ if (!list_empty(&bd->done_list))
+ mask |= POLLIN | POLLRDNORM;
+ spin_unlock_irq(&bd->lock);
+
+ return mask;
+}
+
+static int b_dev_release(struct inode *inode, struct file *file)
+{
+ struct b_dev *bd = file->private_data;
+
+ b_dev_put(bd);
+ return 0;
+}
+
+static struct b_dev *b_dev_lookup(int minor)
+{
+ struct b_dev *bd;
+
+ rcu_read_lock();
+
+ bd = idr_find(&b_minor_idr, minor);
+ if (bd && !atomic_inc_not_zero(&bd->ref))
+ bd = NULL;
+
+ rcu_read_unlock();
+ return bd;
+}
+
+static int b_dev_open(struct inode *inode, struct file *file)
+{
+ struct b_dev *bd;
+
+ bd = b_dev_lookup(iminor(inode));
+ if (!bd)
+ return -ENODEV;
+
+ file->private_data = bd;
+ return 0;
+}
+
+static ssize_t b_dev_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct b_dev *bd = file->private_data;
+ struct b_cmd *bc = NULL;
+ unsigned int total;
+ ssize_t done = 0;
+ int err = 0;
+
+ if (count % sizeof(struct b_user_cmd))
+ return -EINVAL;
+
+ total = count / sizeof(struct b_user_cmd);
+ while (total) {
+ bc = get_free_command(bd);
+ if (IS_ERR(bc)) {
+ err = PTR_ERR(bc);
+ bc = NULL;
+ break;
+ }
+
+ if (copy_from_user(&bc->cmd, buf, sizeof(struct b_user_cmd))) {
+ err = -EFAULT;
+ break;
+ }
+
+ err = b_dev_validate_command(&bc->cmd);
+ if (err)
+ break;
+
+ err = b_dev_add_command(bd, bc);
+ if (err)
+ break;
+
+ done += sizeof(struct b_user_cmd);
+ buf += sizeof(struct b_user_cmd);
+ total--;
+ }
+
+ if (bc)
+ b_dev_free_command(bd, bc);
+
+ *ppos = done;
+ if (!done)
+ done = err;
+
+ return done;
+}
+
+static ssize_t b_dev_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct b_dev *bd = file->private_data;
+ unsigned int total;
+ ssize_t done = 0;
+ int err = 0;
+
+ if (count % sizeof(struct b_user_cmd))
+ return -EINVAL;
+
+ total = count / sizeof(struct b_user_cmd);
+ while (total) {
+ struct b_cmd *bc;
+
+ bc = get_done_command(bd, !(file->f_flags & O_NONBLOCK));
+ if (IS_ERR(bc)) {
+ err = PTR_ERR(bc);
+ break;
+ }
+
+ complete_and_free_bio(bc);
+
+ if (copy_to_user(buf, &bc->cmd, sizeof(bc->cmd)))
+ err = -EFAULT;
+
+ b_dev_free_command(bd, bc);
+
+ if (err)
+ break;
+
+ done += sizeof(struct b_user_cmd);
+ buf += sizeof(struct b_user_cmd);
+ total--;
+ }
+
+ *ppos = done;
+ if (!done)
+ done = err;
+
+ return done;
+}
+
+static const struct file_operations b_dev_fops = {
+ .open = b_dev_open,
+ .release = b_dev_release,
+ .read = b_dev_read,
+ .write = b_dev_write,
+ .poll = b_dev_poll,
+ .owner = THIS_MODULE,
+};
+
+static int b_del_dev(struct b_ioctl_cmd *bic)
+{
+ struct b_dev *bd;
+
+ bd = b_dev_lookup(bic->minor);
+ if (bd) {
+ spin_lock(&b_dev_lock);
+ b_dev_remove_lookup(bd);
+ spin_unlock(&b_dev_lock);
+
+ /*
+ * Our lookup grabbed a reference, drop two
+ */
+ b_dev_put(bd);
+ b_dev_put(bd);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+static int b_add_dev(struct b_ioctl_cmd *bic)
+{
+ struct inode *inode;
+ struct file *file;
+ struct b_dev *bd;
+ int ret;
+
+ file = fget(bic->fd);
+ if (!file)
+ return -EBADF;
+
+ __module_get(THIS_MODULE);
+
+ inode = file->f_mapping->host;
+ if (!S_ISBLK(inode->i_mode)) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = idr_pre_get(&b_minor_idr, GFP_KERNEL);
+ if (!ret) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ bd = kzalloc(sizeof(*bd), GFP_KERNEL);
+ if (!bd) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ atomic_set(&bd->ref, 1);
+ spin_lock_init(&bd->lock);
+ INIT_LIST_HEAD(&bd->done_list);
+ init_waitqueue_head(&bd->wq_done);
+ bd->file = file;
+ bd->bdev = inode->i_bdev;;
+
+ spin_lock(&b_dev_lock);
+
+ ret = idr_get_new(&b_minor_idr, bd, &bd->minor);
+ if (ret < 0)
+ goto out_unlock;
+
+ if (bd->minor >= B_MAX_DEVS)
+ goto out_idr;
+
+ spin_unlock(&b_dev_lock);
+
+ INIT_LIST_HEAD(&bd->device_list);
+ bd->dev = binject_device_create(b_class, NULL,
+ MKDEV(b_major, bd->minor), bd, "binject%d", bd->minor);
+
+ spin_lock(&b_dev_lock);
+
+ if (IS_ERR(bd->dev))
+ goto out_idr;
+
+ list_add_tail(&bd->device_list, &b_dev_list);
+ spin_unlock(&b_dev_lock);
+ return 0;
+out_idr:
+ idr_remove(&b_minor_idr, bd->minor);
+out_unlock:
+ spin_unlock(&b_dev_lock);
+ kfree(bd);
+out_put:
+ fput(file);
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static long b_misc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int __user *uarg = (int __user *) arg;
+ struct b_ioctl_cmd bic;
+
+ if (copy_from_user(&bic, uarg, sizeof(bic)))
+ return -EFAULT;
+
+ switch (cmd) {
+ case 0:
+ return b_add_dev(&bic);
+ case 1:
+ return b_del_dev(&bic);
+ default:
+ break;
+ }
+
+ return -ENOTTY;
+}
+
+static const struct file_operations b_misc_fops = {
+ .unlocked_ioctl = b_misc_ioctl,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice b_misc_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "binject-ctl",
+ .fops = &b_misc_fops,
+};
+
+static void __exit b_exit(void)
+{
+ synchronize_rcu();
+ kmem_cache_destroy(b_slab);
+ class_destroy(b_class);
+ misc_deregister(&b_misc_dev);
+}
+
+static int __init b_init(void)
+{
+ int ret;
+
+ b_slab = binject_create_slab("binject", sizeof(struct b_cmd));
+ if (!b_slab) {
+ printk(KERN_ERR "binject: failed to create cmd slab\n");
+ return -ENOMEM;
+ }
+
+ ret = misc_register(&b_misc_dev);
+ if (ret < 0)
+ goto fail_misc;
+
+ b_major = register_chrdev(0, "binject", &b_dev_fops);
+ if (b_major < 0)
+ goto fail_chr;
+
+ b_class = class_create(THIS_MODULE, "binject");
+ if (IS_ERR(b_class))
+ goto fail_class;
+
+ return 0;
+fail_class:
+ unregister_chrdev(b_major, "binject");
+fail_chr:
+ misc_deregister(&b_misc_dev);
+fail_misc:
+ kmem_cache_destroy(b_slab);
+ return ret;
+}
+
+module_init(b_init);
+module_exit(b_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");