btrfs: add io_uring interface for encoded writes
authorMark Harmstone <maharmstone@fb.com>
Fri, 10 Jan 2025 17:23:52 +0000 (17:23 +0000)
committerDavid Sterba <dsterba@suse.com>
Mon, 13 Jan 2025 20:06:31 +0000 (21:06 +0100)
Add an io_uring interface for encoded writes, with the same parameters
as the BTRFS_IOC_ENCODED_WRITE ioctl.

As with the encoded reads code, there's a test program for this at
https://github.com/maharmstone/io_uring-encoded, and I'll get this
worked into an fstest.

How io_uring works is that it initially calls btrfs_uring_cmd with the
IO_URING_F_NONBLOCK flag set, and if we return -EAGAIN it tries again in
a kthread with the flag cleared.

Ideally we'd honour this and call try_lock etc., but there's still a lot
of work to be done to create non-blocking versions of all the functions
in our write path. Instead, just validate the input in
btrfs_uring_encoded_write() on the first pass and return -EAGAIN, with a
view to properly optimizing the optimistic path later on.

Signed-off-by: Mark Harmstone <maharmstone@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ioctl.c

index 69c0444369b7410f198fd953b2665586bfe945ca..ae98269a5e3af1a6c8553a421e6fcd4668574404 100644 (file)
@@ -4936,6 +4936,128 @@ out_acct:
        return ret;
 }
 
+static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+       loff_t pos;
+       struct kiocb kiocb;
+       struct file *file;
+       ssize_t ret;
+       void __user *sqe_addr;
+       struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data;
+
+       if (!capable(CAP_SYS_ADMIN)) {
+               ret = -EPERM;
+               goto out_acct;
+       }
+
+       file = cmd->file;
+       sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
+
+       if (!(file->f_mode & FMODE_WRITE)) {
+               ret = -EBADF;
+               goto out_acct;
+       }
+
+       if (!data) {
+               data = kzalloc(sizeof(*data), GFP_NOFS);
+               if (!data) {
+                       ret = -ENOMEM;
+                       goto out_acct;
+               }
+
+               io_uring_cmd_get_async_data(cmd)->op_data = data;
+
+               if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+                       struct btrfs_ioctl_encoded_io_args_32 args32;
+
+                       if (copy_from_user(&args32, sqe_addr, sizeof(args32))) {
+                               ret = -EFAULT;
+                               goto out_acct;
+                       }
+                       data->args.iov = compat_ptr(args32.iov);
+                       data->args.iovcnt = args32.iovcnt;
+                       data->args.offset = args32.offset;
+                       data->args.flags = args32.flags;
+                       data->args.len = args32.len;
+                       data->args.unencoded_len = args32.unencoded_len;
+                       data->args.unencoded_offset = args32.unencoded_offset;
+                       data->args.compression = args32.compression;
+                       data->args.encryption = args32.encryption;
+                       memcpy(data->args.reserved, args32.reserved,
+                              sizeof(data->args.reserved));
+#else
+                       ret = -ENOTTY;
+                       goto out_acct;
+#endif
+               } else {
+                       if (copy_from_user(&data->args, sqe_addr, sizeof(data->args))) {
+                               ret = -EFAULT;
+                               goto out_acct;
+                       }
+               }
+
+               ret = -EINVAL;
+               if (data->args.flags != 0)
+                       goto out_acct;
+               if (memchr_inv(data->args.reserved, 0, sizeof(data->args.reserved)))
+                       goto out_acct;
+               if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
+                   data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+                       goto out_acct;
+               if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
+                   data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
+                       goto out_acct;
+               if (data->args.unencoded_offset > data->args.unencoded_len)
+                       goto out_acct;
+               if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset)
+                       goto out_acct;
+
+               data->iov = data->iovstack;
+               ret = import_iovec(ITER_SOURCE, data->args.iov, data->args.iovcnt,
+                                  ARRAY_SIZE(data->iovstack), &data->iov,
+                                  &data->iter);
+               if (ret < 0)
+                       goto out_acct;
+
+               if (iov_iter_count(&data->iter) == 0) {
+                       ret = 0;
+                       goto out_iov;
+               }
+       }
+
+       if (issue_flags & IO_URING_F_NONBLOCK) {
+               ret = -EAGAIN;
+               goto out_acct;
+       }
+
+       pos = data->args.offset;
+       ret = rw_verify_area(WRITE, file, &pos, data->args.len);
+       if (ret < 0)
+               goto out_iov;
+
+       init_sync_kiocb(&kiocb, file);
+       ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
+       if (ret)
+               goto out_iov;
+       kiocb.ki_pos = pos;
+
+       file_start_write(file);
+
+       ret = btrfs_do_write_iter(&kiocb, &data->iter, &data->args);
+       if (ret > 0)
+               fsnotify_modify(file);
+
+       file_end_write(file);
+out_iov:
+       kfree(data->iov);
+out_acct:
+       if (ret > 0)
+               add_wchar(current, ret);
+       inc_syscw(current);
+       return ret;
+}
+
 int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
        switch (cmd->cmd_op) {
@@ -4944,6 +5066,12 @@ int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
        case BTRFS_IOC_ENCODED_READ_32:
 #endif
                return btrfs_uring_encoded_read(cmd, issue_flags);
+
+       case BTRFS_IOC_ENCODED_WRITE:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+       case BTRFS_IOC_ENCODED_WRITE_32:
+#endif
+               return btrfs_uring_encoded_write(cmd, issue_flags);
        }
 
        return -EINVAL;