ublk: return -EINTR if breaking from waiting for existed users in DEL_DEV
[linux-block.git] / drivers / block / ublk_drv.c
index 33d3298a0da16f1f70ecc87495f77746f2e6a4af..21d2e71c5514df82874362589f211df1e6c25af3 100644 (file)
@@ -43,6 +43,7 @@
 #include <asm/page.h>
 #include <linux/task_work.h>
 #include <linux/namei.h>
+#include <linux/kref.h>
 #include <uapi/linux/ublk_cmd.h>
 
 #define UBLK_MINORS            (1U << MINORBITS)
@@ -54,7 +55,8 @@
                | UBLK_F_USER_RECOVERY \
                | UBLK_F_USER_RECOVERY_REISSUE \
                | UBLK_F_UNPRIVILEGED_DEV \
-               | UBLK_F_CMD_IOCTL_ENCODE)
+               | UBLK_F_CMD_IOCTL_ENCODE \
+               | UBLK_F_USER_COPY)
 
 /* All UBLK_PARAM_TYPE_* should be included here */
 #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
@@ -62,7 +64,8 @@
 
 struct ublk_rq_data {
        struct llist_node node;
-       struct callback_head work;
+
+       struct kref ref;
 };
 
 struct ublk_uring_cmd_pdu {
@@ -182,8 +185,13 @@ struct ublk_params_header {
        __u32   types;
 };
 
+static inline void __ublk_complete_rq(struct request *req);
+static void ublk_complete_rq(struct kref *ref);
+
 static dev_t ublk_chr_devt;
-static struct class *ublk_chr_class;
+static const struct class ublk_chr_class = {
+       .name = "ublk-char",
+};
 
 static DEFINE_IDR(ublk_index_idr);
 static DEFINE_SPINLOCK(ublk_idr_lock);
@@ -202,6 +210,23 @@ static unsigned int ublks_added;   /* protected by ublk_ctl_mutex */
 
 static struct miscdevice ublk_misc;
 
+static inline unsigned ublk_pos_to_hwq(loff_t pos)
+{
+       return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_QID_OFF) &
+               UBLK_QID_BITS_MASK;
+}
+
+static inline unsigned ublk_pos_to_buf_off(loff_t pos)
+{
+       return (pos - UBLKSRV_IO_BUF_OFFSET) & UBLK_IO_BUF_BITS_MASK;
+}
+
+static inline unsigned ublk_pos_to_tag(loff_t pos)
+{
+       return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_TAG_OFF) &
+               UBLK_TAG_BITS_MASK;
+}
+
 static void ublk_dev_param_basic_apply(struct ublk_device *ub)
 {
        struct request_queue *q = ub->ub_disk->queue;
@@ -290,12 +315,52 @@ static int ublk_apply_params(struct ublk_device *ub)
        return 0;
 }
 
-static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
+static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
 {
-       if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
-                       !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
-               return true;
-       return false;
+       return ubq->flags & UBLK_F_USER_COPY;
+}
+
+static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
+{
+       /*
+        * read()/write() is involved in user copy, so request reference
+        * has to be grabbed
+        */
+       return ublk_support_user_copy(ubq);
+}
+
+static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
+               struct request *req)
+{
+       if (ublk_need_req_ref(ubq)) {
+               struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+               kref_init(&data->ref);
+       }
+}
+
+static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
+               struct request *req)
+{
+       if (ublk_need_req_ref(ubq)) {
+               struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+               return kref_get_unless_zero(&data->ref);
+       }
+
+       return true;
+}
+
+static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
+               struct request *req)
+{
+       if (ublk_need_req_ref(ubq)) {
+               struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+               kref_put(&data->ref, ublk_complete_rq);
+       } else {
+               __ublk_complete_rq(req);
+       }
 }
 
 static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
@@ -384,9 +449,9 @@ static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
        *owner_gid = from_kgid(&init_user_ns, gid);
 }
 
-static int ublk_open(struct block_device *bdev, fmode_t mode)
+static int ublk_open(struct gendisk *disk, blk_mode_t mode)
 {
-       struct ublk_device *ub = bdev->bd_disk->private_data;
+       struct ublk_device *ub = disk->private_data;
 
        if (capable(CAP_SYS_ADMIN))
                return 0;
@@ -421,49 +486,39 @@ static const struct block_device_operations ub_fops = {
 
 #define UBLK_MAX_PIN_PAGES     32
 
-struct ublk_map_data {
-       const struct request *rq;
-       unsigned long   ubuf;
-       unsigned int    len;
-};
-
 struct ublk_io_iter {
        struct page *pages[UBLK_MAX_PIN_PAGES];
-       unsigned pg_off;        /* offset in the 1st page in pages */
-       int nr_pages;           /* how many page pointers in pages */
        struct bio *bio;
        struct bvec_iter iter;
 };
 
-static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
-               unsigned max_bytes, bool to_vm)
+/* return how many pages are copied */
+static void ublk_copy_io_pages(struct ublk_io_iter *data,
+               size_t total, size_t pg_off, int dir)
 {
-       const unsigned total = min_t(unsigned, max_bytes,
-                       PAGE_SIZE - data->pg_off +
-                       ((data->nr_pages - 1) << PAGE_SHIFT));
        unsigned done = 0;
        unsigned pg_idx = 0;
 
        while (done < total) {
                struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
-               const unsigned int bytes = min3(bv.bv_len, total - done,
-                               (unsigned)(PAGE_SIZE - data->pg_off));
+               unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
+                               (unsigned)(PAGE_SIZE - pg_off));
                void *bv_buf = bvec_kmap_local(&bv);
                void *pg_buf = kmap_local_page(data->pages[pg_idx]);
 
-               if (to_vm)
-                       memcpy(pg_buf + data->pg_off, bv_buf, bytes);
+               if (dir == ITER_DEST)
+                       memcpy(pg_buf + pg_off, bv_buf, bytes);
                else
-                       memcpy(bv_buf, pg_buf + data->pg_off, bytes);
+                       memcpy(bv_buf, pg_buf + pg_off, bytes);
 
                kunmap_local(pg_buf);
                kunmap_local(bv_buf);
 
                /* advance page array */
-               data->pg_off += bytes;
-               if (data->pg_off == PAGE_SIZE) {
+               pg_off += bytes;
+               if (pg_off == PAGE_SIZE) {
                        pg_idx += 1;
-                       data->pg_off = 0;
+                       pg_off = 0;
                }
 
                done += bytes;
@@ -477,41 +532,58 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
                        data->iter = data->bio->bi_iter;
                }
        }
+}
 
-       return done;
+static bool ublk_advance_io_iter(const struct request *req,
+               struct ublk_io_iter *iter, unsigned int offset)
+{
+       struct bio *bio = req->bio;
+
+       for_each_bio(bio) {
+               if (bio->bi_iter.bi_size > offset) {
+                       iter->bio = bio;
+                       iter->iter = bio->bi_iter;
+                       bio_advance_iter(iter->bio, &iter->iter, offset);
+                       return true;
+               }
+               offset -= bio->bi_iter.bi_size;
+       }
+       return false;
 }
 
-static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm)
+/*
+ * Copy data between request pages and io_iter, and 'offset'
+ * is the start point of linear offset of request.
+ */
+static size_t ublk_copy_user_pages(const struct request *req,
+               unsigned offset, struct iov_iter *uiter, int dir)
 {
-       const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
-       const unsigned long start_vm = data->ubuf;
-       unsigned int done = 0;
-       struct ublk_io_iter iter = {
-               .pg_off = start_vm & (PAGE_SIZE - 1),
-               .bio    = data->rq->bio,
-               .iter   = data->rq->bio->bi_iter,
-       };
-       const unsigned int nr_pages = round_up(data->len +
-                       (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
-
-       while (done < nr_pages) {
-               const unsigned to_pin = min_t(unsigned, UBLK_MAX_PIN_PAGES,
-                               nr_pages - done);
-               unsigned i, len;
-
-               iter.nr_pages = get_user_pages_fast(start_vm +
-                               (done << PAGE_SHIFT), to_pin, gup_flags,
-                               iter.pages);
-               if (iter.nr_pages <= 0)
-                       return done == 0 ? iter.nr_pages : done;
-               len = ublk_copy_io_pages(&iter, data->len, to_vm);
-               for (i = 0; i < iter.nr_pages; i++) {
-                       if (to_vm)
+       struct ublk_io_iter iter;
+       size_t done = 0;
+
+       if (!ublk_advance_io_iter(req, &iter, offset))
+               return 0;
+
+       while (iov_iter_count(uiter) && iter.bio) {
+               unsigned nr_pages;
+               ssize_t len;
+               size_t off;
+               int i;
+
+               len = iov_iter_get_pages2(uiter, iter.pages,
+                               iov_iter_count(uiter),
+                               UBLK_MAX_PIN_PAGES, &off);
+               if (len <= 0)
+                       return done;
+
+               ublk_copy_io_pages(&iter, len, off, dir);
+               nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
+               for (i = 0; i < nr_pages; i++) {
+                       if (dir == ITER_DEST)
                                set_page_dirty(iter.pages[i]);
                        put_page(iter.pages[i]);
                }
-               data->len -= len;
-               done += iter.nr_pages;
+               done += len;
        }
 
        return done;
@@ -532,21 +604,23 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
 {
        const unsigned int rq_bytes = blk_rq_bytes(req);
 
+       if (ublk_support_user_copy(ubq))
+               return rq_bytes;
+
        /*
         * no zero copy, we delay copy WRITE request data into ublksrv
         * context and the big benefit is that pinning pages in current
         * context is pretty fast, see ublk_pin_user_pages
         */
        if (ublk_need_map_req(req)) {
-               struct ublk_map_data data = {
-                       .rq     =       req,
-                       .ubuf   =       io->addr,
-                       .len    =       rq_bytes,
-               };
+               struct iov_iter iter;
+               struct iovec iov;
+               const int dir = ITER_DEST;
 
-               ublk_copy_user_pages(&data, true);
+               import_single_range(dir, u64_to_user_ptr(io->addr), rq_bytes,
+                               &iov, &iter);
 
-               return rq_bytes - data.len;
+               return ublk_copy_user_pages(req, 0, &iter, dir);
        }
        return rq_bytes;
 }
@@ -557,18 +631,19 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
 {
        const unsigned int rq_bytes = blk_rq_bytes(req);
 
+       if (ublk_support_user_copy(ubq))
+               return rq_bytes;
+
        if (ublk_need_unmap_req(req)) {
-               struct ublk_map_data data = {
-                       .rq     =       req,
-                       .ubuf   =       io->addr,
-                       .len    =       io->res,
-               };
+               struct iov_iter iter;
+               struct iovec iov;
+               const int dir = ITER_SOURCE;
 
                WARN_ON_ONCE(io->res > rq_bytes);
 
-               ublk_copy_user_pages(&data, false);
-
-               return io->res - data.len;
+               import_single_range(dir, u64_to_user_ptr(io->addr), io->res,
+                               &iov, &iter);
+               return ublk_copy_user_pages(req, 0, &iter, dir);
        }
        return rq_bytes;
 }
@@ -648,13 +723,19 @@ static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
 }
 
 /* todo: handle partial completion */
-static void ublk_complete_rq(struct request *req)
+static inline void __ublk_complete_rq(struct request *req)
 {
        struct ublk_queue *ubq = req->mq_hctx->driver_data;
        struct ublk_io *io = &ubq->ios[req->tag];
        unsigned int unmapped_bytes;
        blk_status_t res = BLK_STS_OK;
 
+       /* called from ublk_abort_queue() code path */
+       if (io->flags & UBLK_IO_FLAG_ABORTED) {
+               res = BLK_STS_IOERR;
+               goto exit;
+       }
+
        /* failed read IO if nothing is read */
        if (!io->res && req_op(req) == REQ_OP_READ)
                io->res = -EIO;
@@ -694,6 +775,15 @@ exit:
        blk_mq_end_request(req, res);
 }
 
+static void ublk_complete_rq(struct kref *ref)
+{
+       struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+                       ref);
+       struct request *req = blk_mq_rq_from_pdu(data);
+
+       __ublk_complete_rq(req);
+}
+
 /*
  * Since __ublk_rq_task_work always fails requests immediately during
  * exiting, __ublk_fail_req() is only called from abort context during
@@ -712,7 +802,7 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
                if (ublk_queue_can_use_recovery_reissue(ubq))
                        blk_mq_requeue_request(req, false);
                else
-                       blk_mq_end_request(req, BLK_STS_IOERR);
+                       ublk_put_req_ref(ubq, req);
        }
 }
 
@@ -821,6 +911,7 @@ static inline void __ublk_rq_task_work(struct request *req,
                        mapped_bytes >> 9;
        }
 
+       ublk_init_req_ref(ubq, req);
        ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
 }
 
@@ -852,17 +943,6 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
        ublk_forward_io_cmds(ubq, issue_flags);
 }
 
-static void ublk_rq_task_work_fn(struct callback_head *work)
-{
-       struct ublk_rq_data *data = container_of(work,
-                       struct ublk_rq_data, work);
-       struct request *req = blk_mq_rq_from_pdu(data);
-       struct ublk_queue *ubq = req->mq_hctx->driver_data;
-       unsigned issue_flags = IO_URING_F_UNLOCKED;
-
-       ublk_forward_io_cmds(ubq, issue_flags);
-}
-
 static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
 {
        struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
@@ -886,10 +966,6 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
         */
        if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
                ublk_abort_io_cmds(ubq);
-       } else if (ublk_can_use_task_work(ubq)) {
-               if (task_work_add(ubq->ubq_daemon, &data->work,
-                                       TWA_SIGNAL_NO_IPI))
-                       ublk_abort_io_cmds(ubq);
        } else {
                struct io_uring_cmd *cmd = io->cmd;
                struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
@@ -961,19 +1037,9 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
        return 0;
 }
 
-static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req,
-               unsigned int hctx_idx, unsigned int numa_node)
-{
-       struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
-
-       init_task_work(&data->work, ublk_rq_task_work_fn);
-       return 0;
-}
-
 static const struct blk_mq_ops ublk_mq_ops = {
        .queue_rq       = ublk_queue_rq,
        .init_hctx      = ublk_init_hctx,
-       .init_request   = ublk_init_rq,
        .timeout        = ublk_timeout,
 };
 
@@ -1050,7 +1116,7 @@ static void ublk_commit_completion(struct ublk_device *ub,
        req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
 
        if (req && likely(!blk_should_fake_timeout(req->q)))
-               ublk_complete_rq(req);
+               ublk_put_req_ref(ubq, req);
 }
 
 /*
@@ -1295,6 +1361,14 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
        return 0;
 }
 
+static inline void ublk_fill_io_cmd(struct ublk_io *io,
+               struct io_uring_cmd *cmd, unsigned long buf_addr)
+{
+       io->cmd = cmd;
+       io->flags |= UBLK_IO_FLAG_ACTIVE;
+       io->addr = buf_addr;
+}
+
 static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
                               unsigned int issue_flags,
                               const struct ublksrv_io_cmd *ub_cmd)
@@ -1340,6 +1414,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
                        ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
                goto out;
 
+       if (ublk_support_user_copy(ubq) && ub_cmd->addr) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        ret = ublk_check_cmd_op(cmd_op);
        if (ret)
                goto out;
@@ -1358,36 +1437,41 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
                 */
                if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
                        goto out;
-               /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
-               if (!ub_cmd->addr && !ublk_need_get_data(ubq))
-                       goto out;
-               io->cmd = cmd;
-               io->flags |= UBLK_IO_FLAG_ACTIVE;
-               io->addr = ub_cmd->addr;
 
+               if (!ublk_support_user_copy(ubq)) {
+                       /*
+                        * FETCH_RQ has to provide IO buffer if NEED GET
+                        * DATA is not enabled
+                        */
+                       if (!ub_cmd->addr && !ublk_need_get_data(ubq))
+                               goto out;
+               }
+
+               ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
                ublk_mark_io_ready(ub, ubq);
                break;
        case UBLK_IO_COMMIT_AND_FETCH_REQ:
                req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
-               /*
-                * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is
-                * not enabled or it is Read IO.
-                */
-               if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ))
-                       goto out;
+
                if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
                        goto out;
-               io->addr = ub_cmd->addr;
-               io->flags |= UBLK_IO_FLAG_ACTIVE;
-               io->cmd = cmd;
+
+               if (!ublk_support_user_copy(ubq)) {
+                       /*
+                        * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+                        * NEED GET DATA is not enabled or it is Read IO.
+                        */
+                       if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
+                                               req_op(req) == REQ_OP_READ))
+                               goto out;
+               }
+               ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
                ublk_commit_completion(ub, ub_cmd);
                break;
        case UBLK_IO_NEED_GET_DATA:
                if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
                        goto out;
-               io->addr = ub_cmd->addr;
-               io->cmd = cmd;
-               io->flags |= UBLK_IO_FLAG_ACTIVE;
+               ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
                ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
                break;
        default:
@@ -1402,6 +1486,36 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
        return -EIOCBQUEUED;
 }
 
+static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
+               struct ublk_queue *ubq, int tag, size_t offset)
+{
+       struct request *req;
+
+       if (!ublk_need_req_ref(ubq))
+               return NULL;
+
+       req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
+       if (!req)
+               return NULL;
+
+       if (!ublk_get_req_ref(ubq, req))
+               return NULL;
+
+       if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
+               goto fail_put;
+
+       if (!ublk_rq_has_data(req))
+               goto fail_put;
+
+       if (offset > blk_rq_bytes(req))
+               goto fail_put;
+
+       return req;
+fail_put:
+       ublk_put_req_ref(ubq, req);
+       return NULL;
+}
+
 static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
        /*
@@ -1419,11 +1533,112 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
        return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
 }
 
+static inline bool ublk_check_ubuf_dir(const struct request *req,
+               int ubuf_dir)
+{
+       /* copy ubuf to request pages */
+       if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE)
+               return true;
+
+       /* copy request pages to ubuf */
+       if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST)
+               return true;
+
+       return false;
+}
+
+static struct request *ublk_check_and_get_req(struct kiocb *iocb,
+               struct iov_iter *iter, size_t *off, int dir)
+{
+       struct ublk_device *ub = iocb->ki_filp->private_data;
+       struct ublk_queue *ubq;
+       struct request *req;
+       size_t buf_off;
+       u16 tag, q_id;
+
+       if (!ub)
+               return ERR_PTR(-EACCES);
+
+       if (!user_backed_iter(iter))
+               return ERR_PTR(-EACCES);
+
+       if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+               return ERR_PTR(-EACCES);
+
+       tag = ublk_pos_to_tag(iocb->ki_pos);
+       q_id = ublk_pos_to_hwq(iocb->ki_pos);
+       buf_off = ublk_pos_to_buf_off(iocb->ki_pos);
+
+       if (q_id >= ub->dev_info.nr_hw_queues)
+               return ERR_PTR(-EINVAL);
+
+       ubq = ublk_get_queue(ub, q_id);
+       if (!ubq)
+               return ERR_PTR(-EINVAL);
+
+       if (tag >= ubq->q_depth)
+               return ERR_PTR(-EINVAL);
+
+       req = __ublk_check_and_get_req(ub, ubq, tag, buf_off);
+       if (!req)
+               return ERR_PTR(-EINVAL);
+
+       if (!req->mq_hctx || !req->mq_hctx->driver_data)
+               goto fail;
+
+       if (!ublk_check_ubuf_dir(req, dir))
+               goto fail;
+
+       *off = buf_off;
+       return req;
+fail:
+       ublk_put_req_ref(ubq, req);
+       return ERR_PTR(-EACCES);
+}
+
+static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct ublk_queue *ubq;
+       struct request *req;
+       size_t buf_off;
+       size_t ret;
+
+       req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST);
+       ubq = req->mq_hctx->driver_data;
+       ublk_put_req_ref(ubq, req);
+
+       return ret;
+}
+
+static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct ublk_queue *ubq;
+       struct request *req;
+       size_t buf_off;
+       size_t ret;
+
+       req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE);
+       ubq = req->mq_hctx->driver_data;
+       ublk_put_req_ref(ubq, req);
+
+       return ret;
+}
+
 static const struct file_operations ublk_ch_fops = {
        .owner = THIS_MODULE,
        .open = ublk_ch_open,
        .release = ublk_ch_release,
        .llseek = no_llseek,
+       .read_iter = ublk_ch_read_iter,
+       .write_iter = ublk_ch_write_iter,
        .uring_cmd = ublk_ch_uring_cmd,
        .mmap = ublk_ch_mmap,
 };
@@ -1547,7 +1762,7 @@ static int ublk_add_chdev(struct ublk_device *ub)
 
        dev->parent = ublk_misc.this_device;
        dev->devt = MKDEV(MAJOR(ublk_chr_devt), minor);
-       dev->class = ublk_chr_class;
+       dev->class = &ublk_chr_class;
        dev->release = ublk_cdev_rel;
        device_initialize(dev);
 
@@ -1632,7 +1847,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
        if (ublksrv_pid <= 0)
                return -EINVAL;
 
-       wait_for_completion_interruptible(&ub->completion);
+       if (wait_for_completion_interruptible(&ub->completion) != 0)
+               return -EINTR;
 
        schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
 
@@ -1818,10 +2034,12 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
         */
        ub->dev_info.flags &= UBLK_F_ALL;
 
-       if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
-               ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+       ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
+               UBLK_F_URING_CMD_COMP_IN_TASK;
 
-       ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE;
+       /* GET_DATA isn't needed any more with USER_COPY */
+       if (ub->dev_info.flags & UBLK_F_USER_COPY)
+               ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
 
        /* We are not ready to support zero copy */
        ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
@@ -1908,8 +2126,8 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
         * - the device number is freed already, we will not find this
         *   device via ublk_get_device_from_id()
         */
-       wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx));
-
+       if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
+               return -EINTR;
        return 0;
 }
 
@@ -2106,7 +2324,9 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
        pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
                        __func__, ub->dev_info.nr_hw_queues, header->dev_id);
        /* wait until new ubq_daemon sending all FETCH_REQ */
-       wait_for_completion_interruptible(&ub->completion);
+       if (wait_for_completion_interruptible(&ub->completion))
+               return -EINTR;
+
        pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n",
                        __func__, ub->dev_info.nr_hw_queues, header->dev_id);
 
@@ -2133,6 +2353,21 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
        return ret;
 }
 
+static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
+{
+       const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
+       void __user *argp = (void __user *)(unsigned long)header->addr;
+       u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY;
+
+       if (header->len != UBLK_FEATURES_LEN || !header->addr)
+               return -EINVAL;
+
+       if (copy_to_user(argp, &features, UBLK_FEATURES_LEN))
+               return -EFAULT;
+
+       return 0;
+}
+
 /*
  * All control commands are sent via /dev/ublk-control, so we have to check
  * the destination device's permission
@@ -2213,6 +2448,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
        case UBLK_CMD_GET_DEV_INFO2:
        case UBLK_CMD_GET_QUEUE_AFFINITY:
        case UBLK_CMD_GET_PARAMS:
+       case (_IOC_NR(UBLK_U_CMD_GET_FEATURES)):
                mask = MAY_READ;
                break;
        case UBLK_CMD_START_DEV:
@@ -2262,6 +2498,11 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
        if (ret)
                goto out;
 
+       if (cmd_op == UBLK_U_CMD_GET_FEATURES) {
+               ret = ublk_ctrl_get_features(cmd);
+               goto out;
+       }
+
        if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) {
                ret = -ENODEV;
                ub = ublk_get_device_from_id(header->dev_id);
@@ -2337,6 +2578,9 @@ static int __init ublk_init(void)
 {
        int ret;
 
+       BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
+                       UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+
        init_waitqueue_head(&ublk_idr_wq);
 
        ret = misc_register(&ublk_misc);
@@ -2347,11 +2591,10 @@ static int __init ublk_init(void)
        if (ret)
                goto unregister_mis;
 
-       ublk_chr_class = class_create("ublk-char");
-       if (IS_ERR(ublk_chr_class)) {
-               ret = PTR_ERR(ublk_chr_class);
+       ret = class_register(&ublk_chr_class);
+       if (ret)
                goto free_chrdev_region;
-       }
+
        return 0;
 
 free_chrdev_region:
@@ -2369,7 +2612,7 @@ static void __exit ublk_exit(void)
        idr_for_each_entry(&ublk_index_idr, ub, id)
                ublk_remove(ub);
 
-       class_destroy(ublk_chr_class);
+       class_unregister(&ublk_chr_class);
        misc_deregister(&ublk_misc);
 
        idr_destroy(&ublk_index_idr);