ublk: support user copy
authorMing Lei <ming.lei@redhat.com>
Fri, 19 May 2023 06:50:30 +0000 (14:50 +0800)
committerJens Axboe <axboe@kernel.dk>
Sat, 20 May 2023 01:59:17 +0000 (19:59 -0600)
Currently copy between io request buffer(pages) and userspace buffer is
done inside ublk_map_io() or ublk_unmap_io(). This way performs very
well in case of pre-allocated userspace io buffer.

For dynamically allocated or external userspace backend io buffer,
UBLK_F_NEED_GET_DATA is added for ublk server to provide buffer by one
extra command communication for WRITE request. For READ, userspace
simply provides buffer, but can't know when the buffer is done[1].

Add UBLK_F_USER_COPY by moving io data copy out of kernel by providing
read()/write() on /dev/ublkcN, and simply let ublk server do the io
data copy. This way makes both side cleaner, the cost is that one extra
syscall for copy io data between request and backend buffer.

With UBLK_F_USER_COPY, it actually becomes possible to run per-io zero
copy now, such as, only do zero copy for big size IO, so it can be
thought as one prep patch for supporting zero copy. Meantime zero copy
still needs to expose read()/write() buffer for some corner case, such
as passthrough IO.

[1] READ buffer in UBLK_F_NEED_GET_DATA
https://lore.kernel.org/linux-block/116d8a56-0881-56d3-9bcc-78ff3e1dc4e5@linux.alibaba.com/T/#m23bd4b8634c0a054e6797063167b469949a247bb

ublksrv loop usercopy code:

https://github.com/ming1/ubdsrv/commits/usercopy

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230519065030.351216-8-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/ublk_drv.c
include/uapi/linux/ublk_cmd.h

index ec40ac4f9af3c8ac9a487fa6668a653296cb40ff..e00733b6fea83d13f8b82b35a266ef7c7116ed5e 100644 (file)
@@ -55,7 +55,8 @@
                | UBLK_F_USER_RECOVERY \
                | UBLK_F_USER_RECOVERY_REISSUE \
                | UBLK_F_UNPRIVILEGED_DEV \
-               | UBLK_F_CMD_IOCTL_ENCODE)
+               | UBLK_F_CMD_IOCTL_ENCODE \
+               | UBLK_F_USER_COPY)
 
 /* All UBLK_PARAM_TYPE_* should be included here */
 #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
@@ -312,9 +313,18 @@ static int ublk_apply_params(struct ublk_device *ub)
        return 0;
 }
 
+static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
+{
+       return ubq->flags & UBLK_F_USER_COPY;
+}
+
 static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
 {
-       return false;
+       /*
+        * read()/write() is involved in user copy, so request reference
+        * has to be grabbed
+        */
+       return ublk_support_user_copy(ubq);
 }
 
 static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
@@ -591,6 +601,9 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
 {
        const unsigned int rq_bytes = blk_rq_bytes(req);
 
+       if (ublk_support_user_copy(ubq))
+               return rq_bytes;
+
        /*
         * no zero copy, we delay copy WRITE request data into ublksrv
         * context and the big benefit is that pinning pages in current
@@ -615,6 +628,9 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
 {
        const unsigned int rq_bytes = blk_rq_bytes(req);
 
+       if (ublk_support_user_copy(ubq))
+               return rq_bytes;
+
        if (ublk_need_unmap_req(req)) {
                struct iov_iter iter;
                struct iovec iov;
@@ -1390,6 +1406,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
                        ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
                goto out;
 
+       if (ublk_support_user_copy(ubq) && ub_cmd->addr) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        ret = ublk_check_cmd_op(cmd_op);
        if (ret)
                goto out;
@@ -1408,23 +1429,34 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
                 */
                if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
                        goto out;
-               /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
-               if (!ub_cmd->addr && !ublk_need_get_data(ubq))
-                       goto out;
+
+               if (!ublk_support_user_copy(ubq)) {
+                       /*
+                        * FETCH_RQ has to provide IO buffer if NEED GET
+                        * DATA is not enabled
+                        */
+                       if (!ub_cmd->addr && !ublk_need_get_data(ubq))
+                               goto out;
+               }
 
                ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
                ublk_mark_io_ready(ub, ubq);
                break;
        case UBLK_IO_COMMIT_AND_FETCH_REQ:
                req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
-               /*
-                * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is
-                * not enabled or it is Read IO.
-                */
-               if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ))
-                       goto out;
+
                if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
                        goto out;
+
+               if (!ublk_support_user_copy(ubq)) {
+                       /*
+                        * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+                        * NEED GET DATA is not enabled or it is Read IO.
+                        */
+                       if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
+                                               req_op(req) == REQ_OP_READ))
+                               goto out;
+               }
                ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
                ublk_commit_completion(ub, ub_cmd);
                break;
@@ -1996,6 +2028,10 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
        ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
                UBLK_F_URING_CMD_COMP_IN_TASK;
 
+       /* GET_DATA isn't needed any more with USER_COPY */
+       if (ub->dev_info.flags & UBLK_F_USER_COPY)
+               ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
+
        /* We are not ready to support zero copy */
        ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
 
index c0c1632c671e51de32674b94fe8a28ce3d6b7bb2..54b5b0aeefca0c3360efd2093f6417d363b94b37 100644 (file)
 /* use ioctl encoding for uring command */
 #define UBLK_F_CMD_IOCTL_ENCODE        (1UL << 6)
 
+/* Copy between request and user buffer by pread()/pwrite() */
+#define UBLK_F_USER_COPY       (1UL << 7)
+
 /* device state */
 #define UBLK_S_DEV_DEAD        0
 #define UBLK_S_DEV_LIVE        1