#include <asm/page.h>
#include <linux/task_work.h>
#include <linux/namei.h>
+#include <linux/kref.h>
#include <uapi/linux/ublk_cmd.h>
#define UBLK_MINORS (1U << MINORBITS)
| UBLK_F_NEED_GET_DATA \
| UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
- | UBLK_F_UNPRIVILEGED_DEV)
+ | UBLK_F_UNPRIVILEGED_DEV \
+ | UBLK_F_CMD_IOCTL_ENCODE)
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
struct ublk_rq_data {
struct llist_node node;
- struct callback_head work;
+
+ struct kref ref;
};
struct ublk_uring_cmd_pdu {
unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz;
bool force_abort;
+ bool timeout;
unsigned short nr_io_ready; /* how many ios setup */
struct ublk_device *dev;
struct ublk_io ios[];
__u32 types;
};
+static inline void __ublk_complete_rq(struct request *req);
+static void ublk_complete_rq(struct kref *ref);
+
static dev_t ublk_chr_devt;
static struct class *ublk_chr_class;
if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
const struct ublk_param_basic *p = &ub->params.basic;
- if (p->logical_bs_shift > PAGE_SHIFT)
+ if (p->logical_bs_shift > PAGE_SHIFT || p->logical_bs_shift < 9)
return -EINVAL;
if (p->logical_bs_shift > p->physical_bs_shift)
return 0;
}
-static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
+static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
{
- if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
- !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
- return true;
return false;
}
+static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
+ struct request *req)
+{
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ kref_init(&data->ref);
+ }
+}
+
+static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
+ struct request *req)
+{
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ return kref_get_unless_zero(&data->ref);
+ }
+
+ return true;
+}
+
+static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
+ struct request *req)
+{
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ kref_put(&data->ref, ublk_complete_rq);
+ } else {
+ __ublk_complete_rq(req);
+ }
+}
+
static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
{
- if (ubq->flags & UBLK_F_NEED_GET_DATA)
- return true;
- return false;
+ return ubq->flags & UBLK_F_NEED_GET_DATA;
}
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
static inline bool ublk_queue_can_use_recovery_reissue(
struct ublk_queue *ubq)
{
- if ((ubq->flags & UBLK_F_USER_RECOVERY) &&
- (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE))
- return true;
- return false;
+ return (ubq->flags & UBLK_F_USER_RECOVERY) &&
+ (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE);
}
static inline bool ublk_queue_can_use_recovery(
struct ublk_queue *ubq)
{
- if (ubq->flags & UBLK_F_USER_RECOVERY)
- return true;
- return false;
+ return ubq->flags & UBLK_F_USER_RECOVERY;
}
static inline bool ublk_can_use_recovery(struct ublk_device *ub)
{
- if (ub->dev_info.flags & UBLK_F_USER_RECOVERY)
- return true;
- return false;
+ return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
}
static void ublk_free_disk(struct gendisk *disk)
#define UBLK_MAX_PIN_PAGES 32
-struct ublk_map_data {
- const struct ublk_queue *ubq;
- const struct request *rq;
- const struct ublk_io *io;
- unsigned max_bytes;
-};
-
struct ublk_io_iter {
struct page *pages[UBLK_MAX_PIN_PAGES];
- unsigned pg_off; /* offset in the 1st page in pages */
- int nr_pages; /* how many page pointers in pages */
struct bio *bio;
struct bvec_iter iter;
};
-static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
- unsigned max_bytes, bool to_vm)
+/* return how many pages are copied */
+static void ublk_copy_io_pages(struct ublk_io_iter *data,
+ size_t total, size_t pg_off, int dir)
{
- const unsigned total = min_t(unsigned, max_bytes,
- PAGE_SIZE - data->pg_off +
- ((data->nr_pages - 1) << PAGE_SHIFT));
unsigned done = 0;
unsigned pg_idx = 0;
while (done < total) {
struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- const unsigned int bytes = min3(bv.bv_len, total - done,
- (unsigned)(PAGE_SIZE - data->pg_off));
+ unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
+ (unsigned)(PAGE_SIZE - pg_off));
void *bv_buf = bvec_kmap_local(&bv);
void *pg_buf = kmap_local_page(data->pages[pg_idx]);
- if (to_vm)
- memcpy(pg_buf + data->pg_off, bv_buf, bytes);
+ if (dir == ITER_DEST)
+ memcpy(pg_buf + pg_off, bv_buf, bytes);
else
- memcpy(bv_buf, pg_buf + data->pg_off, bytes);
+ memcpy(bv_buf, pg_buf + pg_off, bytes);
kunmap_local(pg_buf);
kunmap_local(bv_buf);
/* advance page array */
- data->pg_off += bytes;
- if (data->pg_off == PAGE_SIZE) {
+ pg_off += bytes;
+ if (pg_off == PAGE_SIZE) {
pg_idx += 1;
- data->pg_off = 0;
+ pg_off = 0;
}
done += bytes;
data->iter = data->bio->bi_iter;
}
}
-
- return done;
}
-static inline int ublk_copy_user_pages(struct ublk_map_data *data,
- bool to_vm)
+/*
+ * Copy data between request pages and io_iter, and 'offset'
+ * is the start point of linear offset of request.
+ */
+static size_t ublk_copy_user_pages(const struct request *req,
+ struct iov_iter *uiter, int dir)
{
- const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
- const unsigned long start_vm = data->io->addr;
- unsigned int done = 0;
struct ublk_io_iter iter = {
- .pg_off = start_vm & (PAGE_SIZE - 1),
- .bio = data->rq->bio,
- .iter = data->rq->bio->bi_iter,
+ .bio = req->bio,
+ .iter = req->bio->bi_iter,
};
- const unsigned int nr_pages = round_up(data->max_bytes +
- (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
-
- while (done < nr_pages) {
- const unsigned to_pin = min_t(unsigned, UBLK_MAX_PIN_PAGES,
- nr_pages - done);
- unsigned i, len;
-
- iter.nr_pages = get_user_pages_fast(start_vm +
- (done << PAGE_SHIFT), to_pin, gup_flags,
- iter.pages);
- if (iter.nr_pages <= 0)
- return done == 0 ? iter.nr_pages : done;
- len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm);
- for (i = 0; i < iter.nr_pages; i++) {
- if (to_vm)
+ size_t done = 0;
+
+ while (iov_iter_count(uiter) && iter.bio) {
+ unsigned nr_pages;
+ size_t len, off;
+ int i;
+
+ len = iov_iter_get_pages2(uiter, iter.pages,
+ iov_iter_count(uiter),
+ UBLK_MAX_PIN_PAGES, &off);
+ if (len <= 0)
+ return done;
+
+ ublk_copy_io_pages(&iter, len, off, dir);
+ nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
+ for (i = 0; i < nr_pages; i++) {
+ if (dir == ITER_DEST)
set_page_dirty(iter.pages[i]);
put_page(iter.pages[i]);
}
- data->max_bytes -= len;
- done += iter.nr_pages;
+ done += len;
}
return done;
}
+static inline bool ublk_need_map_req(const struct request *req)
+{
+ return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
+}
+
+static inline bool ublk_need_unmap_req(const struct request *req)
+{
+ return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ;
+}
+
static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
struct ublk_io *io)
{
const unsigned int rq_bytes = blk_rq_bytes(req);
+
/*
* no zero copy, we delay copy WRITE request data into ublksrv
* context and the big benefit is that pinning pages in current
* context is pretty fast, see ublk_pin_user_pages
*/
- if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH)
- return rq_bytes;
+ if (ublk_need_map_req(req)) {
+ struct iov_iter iter;
+ struct iovec iov;
+ const int dir = ITER_DEST;
- if (ublk_rq_has_data(req)) {
- struct ublk_map_data data = {
- .ubq = ubq,
- .rq = req,
- .io = io,
- .max_bytes = rq_bytes,
- };
+ import_single_range(dir, u64_to_user_ptr(io->addr), rq_bytes,
+ &iov, &iter);
- ublk_copy_user_pages(&data, true);
-
- return rq_bytes - data.max_bytes;
+ return ublk_copy_user_pages(req, &iter, dir);
}
return rq_bytes;
}
{
const unsigned int rq_bytes = blk_rq_bytes(req);
- if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
- struct ublk_map_data data = {
- .ubq = ubq,
- .rq = req,
- .io = io,
- .max_bytes = io->res,
- };
+ if (ublk_need_unmap_req(req)) {
+ struct iov_iter iter;
+ struct iovec iov;
+ const int dir = ITER_SOURCE;
WARN_ON_ONCE(io->res > rq_bytes);
- ublk_copy_user_pages(&data, false);
-
- return io->res - data.max_bytes;
+ import_single_range(dir, u64_to_user_ptr(io->addr), io->res,
+ &iov, &iter);
+ return ublk_copy_user_pages(req, &iter, dir);
}
return rq_bytes;
}
}
/* todo: handle partial completion */
-static void ublk_complete_rq(struct request *req)
+static inline void __ublk_complete_rq(struct request *req)
{
struct ublk_queue *ubq = req->mq_hctx->driver_data;
struct ublk_io *io = &ubq->ios[req->tag];
unsigned int unmapped_bytes;
+ blk_status_t res = BLK_STS_OK;
+
+ /* called from ublk_abort_queue() code path */
+ if (io->flags & UBLK_IO_FLAG_ABORTED) {
+ res = BLK_STS_IOERR;
+ goto exit;
+ }
/* failed read IO if nothing is read */
if (!io->res && req_op(req) == REQ_OP_READ)
io->res = -EIO;
if (io->res < 0) {
- blk_mq_end_request(req, errno_to_blk_status(io->res));
- return;
+ res = errno_to_blk_status(io->res);
+ goto exit;
}
/*
*
* Both the two needn't unmap.
*/
- if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
- blk_mq_end_request(req, BLK_STS_OK);
- return;
- }
+ if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE)
+ goto exit;
/* for READ request, writing data in iod->addr to rq buffers */
unmapped_bytes = ublk_unmap_io(ubq, req, io);
blk_mq_requeue_request(req, true);
else
__blk_mq_end_request(req, BLK_STS_OK);
+
+ return;
+exit:
+ blk_mq_end_request(req, res);
+}
+
+static void ublk_complete_rq(struct kref *ref)
+{
+ struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+ ref);
+ struct request *req = blk_mq_rq_from_pdu(data);
+
+ __ublk_complete_rq(req);
}
/*
if (ublk_queue_can_use_recovery_reissue(ubq))
blk_mq_requeue_request(req, false);
else
- blk_mq_end_request(req, BLK_STS_IOERR);
+ ublk_put_req_ref(ubq, req);
}
}
-static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+static void ubq_complete_io_cmd(struct ublk_io *io, int res,
+ unsigned issue_flags)
{
/* mark this cmd owned by ublksrv */
io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
io->flags &= ~UBLK_IO_FLAG_ACTIVE;
/* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, res, 0);
+ io_uring_cmd_done(io->cmd, res, 0, issue_flags);
}
#define UBLK_REQUEUE_DELAY_MS 3
mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
}
-static inline void __ublk_rq_task_work(struct request *req)
+static inline void __ublk_rq_task_work(struct request *req,
+ unsigned issue_flags)
{
struct ublk_queue *ubq = req->mq_hctx->driver_data;
int tag = req->tag;
return;
}
- if (ublk_need_get_data(ubq) &&
- (req_op(req) == REQ_OP_WRITE ||
- req_op(req) == REQ_OP_FLUSH)) {
+ if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) {
/*
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
* so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
__func__, io->cmd->cmd_op, ubq->q_id,
req->tag, io->flags);
- ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags);
return;
}
/*
mapped_bytes >> 9;
}
- ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
+ ublk_init_req_ref(ubq, req);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
}
-static inline void ublk_forward_io_cmds(struct ublk_queue *ubq)
+static inline void ublk_forward_io_cmds(struct ublk_queue *ubq,
+ unsigned issue_flags)
{
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
struct ublk_rq_data *data, *tmp;
io_cmds = llist_reverse_order(io_cmds);
llist_for_each_entry_safe(data, tmp, io_cmds, node)
- __ublk_rq_task_work(blk_mq_rq_from_pdu(data));
+ __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags);
}
static inline void ublk_abort_io_cmds(struct ublk_queue *ubq)
__ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
}
-static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
+static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
{
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
- ublk_forward_io_cmds(ubq);
-}
-
-static void ublk_rq_task_work_fn(struct callback_head *work)
-{
- struct ublk_rq_data *data = container_of(work,
- struct ublk_rq_data, work);
- struct request *req = blk_mq_rq_from_pdu(data);
- struct ublk_queue *ubq = req->mq_hctx->driver_data;
-
- ublk_forward_io_cmds(ubq);
+ ublk_forward_io_cmds(ubq, issue_flags);
}
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
*/
if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
ublk_abort_io_cmds(ubq);
- } else if (ublk_can_use_task_work(ubq)) {
- if (task_work_add(ubq->ubq_daemon, &data->work,
- TWA_SIGNAL_NO_IPI))
- ublk_abort_io_cmds(ubq);
} else {
struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
}
}
+static enum blk_eh_timer_return ublk_timeout(struct request *rq)
+{
+ struct ublk_queue *ubq = rq->mq_hctx->driver_data;
+
+ if (ubq->flags & UBLK_F_UNPRIVILEGED_DEV) {
+ if (!ubq->timeout) {
+ send_sig(SIGKILL, ubq->ubq_daemon, 0);
+ ubq->timeout = true;
+ }
+
+ return BLK_EH_DONE;
+ }
+
+ return BLK_EH_RESET_TIMER;
+}
+
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
return 0;
}
-static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
-
- init_task_work(&data->work, ublk_rq_task_work_fn);
- return 0;
-}
-
static const struct blk_mq_ops ublk_mq_ops = {
.queue_rq = ublk_queue_rq,
.init_hctx = ublk_init_hctx,
- .init_request = ublk_init_rq,
+ .timeout = ublk_timeout,
};
static int ublk_ch_open(struct inode *inode, struct file *filp)
}
static void ublk_commit_completion(struct ublk_device *ub,
- struct ublksrv_io_cmd *ub_cmd)
+ const struct ublksrv_io_cmd *ub_cmd)
{
u32 qid = ub_cmd->q_id, tag = ub_cmd->tag;
struct ublk_queue *ubq = ublk_get_queue(ub, qid);
req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
if (req && likely(!blk_should_fake_timeout(req->q)))
- ublk_complete_rq(req);
+ ublk_put_req_ref(ubq, req);
}
/*
struct ublk_io *io = &ubq->ios[i];
if (io->flags & UBLK_IO_FLAG_ACTIVE)
- io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
+ io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0,
+ IO_URING_F_UNLOCKED);
}
/* all io commands are canceled */
ublk_queue_cmd(ubq, req);
}
-static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+static inline int ublk_check_cmd_op(u32 cmd_op)
+{
+ u32 ioc_type = _IOC_TYPE(cmd_op);
+
+ if (!IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u')
+ return -EOPNOTSUPP;
+
+ if (ioc_type != 'u' && ioc_type != 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static inline void ublk_fill_io_cmd(struct ublk_io *io,
+ struct io_uring_cmd *cmd, unsigned long buf_addr)
+{
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ io->addr = buf_addr;
+}
+
+static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
+ unsigned int issue_flags,
+ const struct ublksrv_io_cmd *ub_cmd)
{
- struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
struct ublk_device *ub = cmd->file->private_data;
struct ublk_queue *ubq;
struct ublk_io *io;
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
*/
if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
- ^ (cmd_op == UBLK_IO_NEED_GET_DATA))
+ ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
goto out;
- switch (cmd_op) {
+ ret = ublk_check_cmd_op(cmd_op);
+ if (ret)
+ goto out;
+
+ ret = -EINVAL;
+ switch (_IOC_NR(cmd_op)) {
case UBLK_IO_FETCH_REQ:
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
if (ublk_queue_ready(ubq)) {
/* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
if (!ub_cmd->addr && !ublk_need_get_data(ubq))
goto out;
- io->cmd = cmd;
- io->flags |= UBLK_IO_FLAG_ACTIVE;
- io->addr = ub_cmd->addr;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_mark_io_ready(ub, ubq);
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
goto out;
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
- io->addr = ub_cmd->addr;
- io->flags |= UBLK_IO_FLAG_ACTIVE;
- io->cmd = cmd;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_commit_completion(ub, ub_cmd);
break;
case UBLK_IO_NEED_GET_DATA:
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
- io->addr = ub_cmd->addr;
- io->cmd = cmd;
- io->flags |= UBLK_IO_FLAG_ACTIVE;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break;
default:
return -EIOCBQUEUED;
out:
- io_uring_cmd_done(cmd, ret, 0);
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
__func__, cmd_op, tag, ret, io->flags);
return -EIOCBQUEUED;
}
+static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ /*
+ * Not necessary for async retry, but let's keep it simple and always
+ * copy the values to avoid any potential reuse.
+ */
+ const struct ublksrv_io_cmd *ub_src = io_uring_sqe_cmd(cmd->sqe);
+ const struct ublksrv_io_cmd ub_cmd = {
+ .q_id = READ_ONCE(ub_src->q_id),
+ .tag = READ_ONCE(ub_src->tag),
+ .result = READ_ONCE(ub_src->result),
+ .addr = READ_ONCE(ub_src->addr)
+ };
+
+ return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
+}
+
static const struct file_operations ublk_ch_fops = {
.owner = THIS_MODULE,
.open = ublk_ch_open,
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
struct gendisk *disk;
int ret = -EINVAL;
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
get_device(&ub->cdev_dev);
+ ub->dev_info.state = UBLK_S_DEV_LIVE;
ret = add_disk(disk);
if (ret) {
/*
* Has to drop the reference since ->free_disk won't be
* called in case of add_disk failure.
*/
+ ub->dev_info.state = UBLK_S_DEV_DEAD;
ublk_put_device(ub);
goto out_put_disk;
}
set_bit(UB_STATE_USED, &ub->state);
- ub->dev_info.state = UBLK_S_DEV_LIVE;
out_put_disk:
if (ret)
put_disk(disk);
static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
cpumask_var_t cpumask;
unsigned long queue;
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublksrv_ctrl_dev_info info;
struct ublk_device *ub;
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
return -EPERM;
+ /*
+ * unprivileged device can't be trusted, but RECOVERY and
+ * RECOVERY_REISSUE still may hang error handling, so can't
+ * support recovery features for unprivileged ublk now
+ *
+ * TODO: provide forward progress for RECOVERY handler, so that
+ * unprivileged device can benefit from it
+ */
+ if (info.flags & UBLK_F_UNPRIVILEGED_DEV)
+ info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE |
+ UBLK_F_USER_RECOVERY);
+
/* the created device is always owned by current user */
ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
*/
ub->dev_info.flags &= UBLK_F_ALL;
- if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
- ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+ ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
+ UBLK_F_URING_CMD_COMP_IN_TASK;
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n",
__func__, cmd->cmd_op, header->dev_id, header->queue_id,
static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
static int ublk_ctrl_get_params(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret;
static int ublk_ctrl_set_params(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
struct ublk_params_header ph;
int ret = -EFAULT;
/* clear all we don't support yet */
ub->params.types &= UBLK_PARAM_TYPE_ALL;
ret = ublk_validate_params(ub);
+ if (ret)
+ ub->params.types = 0;
}
mutex_unlock(&ub->mutex);
put_task_struct(ubq->ubq_daemon);
/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
ubq->ubq_daemon = NULL;
+ ubq->timeout = false;
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ret = -EINVAL;
int i;
static int ublk_ctrl_end_recovery(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
int ublksrv_pid = (int)header->data[0];
int ret = -EINVAL;
static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
struct io_uring_cmd *cmd)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)io_uring_sqe_cmd(cmd->sqe);
bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
void __user *argp = (void __user *)(unsigned long)header->addr;
char *dev_path = NULL;
* know if the specified device is created as unprivileged
* mode.
*/
- if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2)
+ if (_IOC_NR(cmd->cmd_op) != UBLK_CMD_GET_DEV_INFO2)
return 0;
}
dev_path[header->dev_path_len] = 0;
ret = -EINVAL;
- switch (cmd->cmd_op) {
+ switch (_IOC_NR(cmd->cmd_op)) {
case UBLK_CMD_GET_DEV_INFO:
case UBLK_CMD_GET_DEV_INFO2:
case UBLK_CMD_GET_QUEUE_AFFINITY:
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
- struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
struct ublk_device *ub = NULL;
+ u32 cmd_op = cmd->cmd_op;
int ret = -EINVAL;
if (issue_flags & IO_URING_F_NONBLOCK)
if (!(issue_flags & IO_URING_F_SQE128))
goto out;
- if (cmd->cmd_op != UBLK_CMD_ADD_DEV) {
+ ret = ublk_check_cmd_op(cmd_op);
+ if (ret)
+ goto out;
+
+ if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) {
ret = -ENODEV;
ub = ublk_get_device_from_id(header->dev_id);
if (!ub)
goto out;
ret = ublk_ctrl_uring_cmd_permission(ub, cmd);
- } else {
- /* ADD_DEV permission check is done in command handler */
- ret = 0;
+ if (ret)
+ goto put_dev;
}
- if (ret)
- goto put_dev;
-
- switch (cmd->cmd_op) {
+ switch (_IOC_NR(cmd_op)) {
case UBLK_CMD_START_DEV:
ret = ublk_ctrl_start_dev(ub, cmd);
break;
if (ub)
ublk_put_device(ub);
out:
- io_uring_cmd_done(cmd, ret, 0);
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
__func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
return -EIOCBQUEUED;
if (ret)
goto unregister_mis;
- ublk_chr_class = class_create(THIS_MODULE, "ublk-char");
+ ublk_chr_class = class_create("ublk-char");
if (IS_ERR(ublk_chr_class)) {
ret = PTR_ERR(ublk_chr_class);
goto free_chrdev_region;