nvme: fine-granular CAP_SYS_ADMIN for nvme io commands
authorKanchan Joshi <joshi.k@samsung.com>
Mon, 31 Oct 2022 16:23:50 +0000 (21:53 +0530)
committerChristoph Hellwig <hch@lst.de>
Tue, 15 Nov 2022 09:50:30 +0000 (10:50 +0100)
Currently both io and admin commands are kept under a
coarse-granular CAP_SYS_ADMIN check, disregarding file mode completely.

$ ls -l /dev/ng*
crw-rw-rw- 1 root root 242, 0 Sep  9 19:20 /dev/ng0n1
crw------- 1 root root 242, 1 Sep  9 19:20 /dev/ng0n2

In the example above, ng0n1 appears as if it may allow unprivileged
read/write operation but it does not and behaves same as ng0n2.

This patch implements a shift from CAP_SYS_ADMIN to more fine-granular
control for io-commands.
If CAP_SYS_ADMIN is present, nothing else is checked as before.
Otherwise, following rules are in place
- any admin-cmd is not allowed
- vendor-specific and fabric commmand are not allowed
- io-commands that can write are allowed if matching FMODE_WRITE
permission is present
- io-commands that read are allowed

Add a helper nvme_cmd_allowed that implements above policy.
Change all the callers of CAP_SYS_ADMIN to go through nvme_cmd_allowed
for any decision making.
Since file open mode is counted for any approval/denial, change at
various places to keep file-mode information handy.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/host/ioctl.c
include/linux/nvme.h

index 81f5550b670da05c55d961fed84368c858e0325d..1d68f161064ae188e830985a8c9b14b91e7c57af 100644 (file)
@@ -8,6 +8,34 @@
 #include <linux/io_uring.h>
 #include "nvme.h"
 
+static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
+               fmode_t mode)
+{
+       if (capable(CAP_SYS_ADMIN))
+               return true;
+
+       /*
+        * Do not allow unprivileged processes to send vendor specific or fabrics
+        * commands as we can't be sure about their effects.
+        */
+       if (c->common.opcode >= nvme_cmd_vendor_start ||
+           c->common.opcode == nvme_fabrics_command)
+               return false;
+
+       /* do not allow unprivileged admin commands */
+       if (!ns)
+               return false;
+
+       /*
+        * Only allow I/O commands that transfer data to the controller if the
+        * special file is open for writing, but always allow I/O commands that
+        * transfer data from the controller.
+        */
+       if (nvme_is_write(c))
+               return mode & FMODE_WRITE;
+       return true;
+}
+
 /*
  * Convert integer values from ioctl structures to user pointers, silently
  * ignoring the upper bits in the compat case to match behaviour of 32-bit
@@ -261,7 +289,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
 }
 
 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-                       struct nvme_passthru_cmd __user *ucmd)
+                       struct nvme_passthru_cmd __user *ucmd, fmode_t mode)
 {
        struct nvme_passthru_cmd cmd;
        struct nvme_command c;
@@ -269,8 +297,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        u64 result;
        int status;
 
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
        if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
                return -EFAULT;
        if (cmd.flags)
@@ -291,6 +317,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        c.common.cdw14 = cpu_to_le32(cmd.cdw14);
        c.common.cdw15 = cpu_to_le32(cmd.cdw15);
 
+       if (!nvme_cmd_allowed(ns, &c, mode))
+               return -EACCES;
+
        if (cmd.timeout_ms)
                timeout = msecs_to_jiffies(cmd.timeout_ms);
 
@@ -308,15 +337,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 }
 
 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-                       struct nvme_passthru_cmd64 __user *ucmd, bool vec)
+                       struct nvme_passthru_cmd64 __user *ucmd, bool vec,
+                       fmode_t mode)
 {
        struct nvme_passthru_cmd64 cmd;
        struct nvme_command c;
        unsigned timeout = 0;
        int status;
 
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
        if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
                return -EFAULT;
        if (cmd.flags)
@@ -337,6 +365,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        c.common.cdw14 = cpu_to_le32(cmd.cdw14);
        c.common.cdw15 = cpu_to_le32(cmd.cdw15);
 
+       if (!nvme_cmd_allowed(ns, &c, mode))
+               return -EACCES;
+
        if (cmd.timeout_ms)
                timeout = msecs_to_jiffies(cmd.timeout_ms);
 
@@ -483,9 +514,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        void *meta = NULL;
        int ret;
 
-       if (!capable(CAP_SYS_ADMIN))
-               return -EACCES;
-
        c.common.opcode = READ_ONCE(cmd->opcode);
        c.common.flags = READ_ONCE(cmd->flags);
        if (c.common.flags)
@@ -507,6 +535,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
        c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
 
+       if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode))
+               return -EACCES;
+
        d.metadata = READ_ONCE(cmd->metadata);
        d.addr = READ_ONCE(cmd->addr);
        d.data_len = READ_ONCE(cmd->data_len);
@@ -570,13 +601,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
 }
 
 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
-               void __user *argp)
+               void __user *argp, fmode_t mode)
 {
        switch (cmd) {
        case NVME_IOCTL_ADMIN_CMD:
-               return nvme_user_cmd(ctrl, NULL, argp);
+               return nvme_user_cmd(ctrl, NULL, argp, mode);
        case NVME_IOCTL_ADMIN64_CMD:
-               return nvme_user_cmd64(ctrl, NULL, argp, false);
+               return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
        default:
                return sed_ioctl(ctrl->opal_dev, cmd, argp);
        }
@@ -601,14 +632,14 @@ struct nvme_user_io32 {
 #endif /* COMPAT_FOR_U64_ALIGNMENT */
 
 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
-               void __user *argp)
+               void __user *argp, fmode_t mode)
 {
        switch (cmd) {
        case NVME_IOCTL_ID:
                force_successful_syscall_return();
                return ns->head->ns_id;
        case NVME_IOCTL_IO_CMD:
-               return nvme_user_cmd(ns->ctrl, ns, argp);
+               return nvme_user_cmd(ns->ctrl, ns, argp, mode);
        /*
         * struct nvme_user_io can have different padding on some 32-bit ABIs.
         * Just accept the compat version as all fields that are used are the
@@ -620,19 +651,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
        case NVME_IOCTL_SUBMIT_IO:
                return nvme_submit_io(ns, argp);
        case NVME_IOCTL_IO64_CMD:
-               return nvme_user_cmd64(ns->ctrl, ns, argp, false);
+               return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
        case NVME_IOCTL_IO64_CMD_VEC:
-               return nvme_user_cmd64(ns->ctrl, ns, argp, true);
+               return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
        default:
                return -ENOTTY;
        }
 }
 
-static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg)
+static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg,
+                       fmode_t mode)
 {
-       if (is_ctrl_ioctl(cmd))
-               return nvme_ctrl_ioctl(ns->ctrl, cmd, arg);
-       return nvme_ns_ioctl(ns, cmd, arg);
+       if (is_ctrl_ioctl(cmd))
+               return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode);
+       return nvme_ns_ioctl(ns, cmd, arg, mode);
 }
 
 int nvme_ioctl(struct block_device *bdev, fmode_t mode,
@@ -640,7 +672,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 {
        struct nvme_ns *ns = bdev->bd_disk->private_data;
 
-       return __nvme_ioctl(ns, cmd, (void __user *)arg);
+       return __nvme_ioctl(ns, cmd, (void __user *)arg, mode);
 }
 
 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -648,7 +680,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        struct nvme_ns *ns =
                container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
 
-       return __nvme_ioctl(ns, cmd, (void __user *)arg);
+       return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode);
 }
 
 static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -716,7 +748,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
 }
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
-               void __user *argp, struct nvme_ns_head *head, int srcu_idx)
+               void __user *argp, struct nvme_ns_head *head, int srcu_idx,
+               fmode_t mode)
        __releases(&head->srcu)
 {
        struct nvme_ctrl *ctrl = ns->ctrl;
@@ -724,7 +757,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 
        nvme_get_ctrl(ns->ctrl);
        srcu_read_unlock(&head->srcu, srcu_idx);
-       ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+       ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
 
        nvme_put_ctrl(ctrl);
        return ret;
@@ -749,9 +782,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
         * deadlock when deleting namespaces using the passthrough interface.
         */
        if (is_ctrl_ioctl(cmd))
-               return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+               return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+                                       mode);
 
-       ret = nvme_ns_ioctl(ns, cmd, argp);
+       ret = nvme_ns_ioctl(ns, cmd, argp, mode);
 out_unlock:
        srcu_read_unlock(&head->srcu, srcu_idx);
        return ret;
@@ -773,9 +807,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
                goto out_unlock;
 
        if (is_ctrl_ioctl(cmd))
-               return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+               return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
+                               file->f_mode);
 
-       ret = nvme_ns_ioctl(ns, cmd, argp);
+       ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode);
 out_unlock:
        srcu_read_unlock(&head->srcu, srcu_idx);
        return ret;
@@ -849,7 +884,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
        return ret;
 }
 
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
+               fmode_t mode)
 {
        struct nvme_ns *ns;
        int ret;
@@ -873,7 +909,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
        kref_get(&ns->kref);
        up_read(&ctrl->namespaces_rwsem);
 
-       ret = nvme_user_cmd(ctrl, ns, argp);
+       ret = nvme_user_cmd(ctrl, ns, argp, mode);
        nvme_put_ns(ns);
        return ret;
 
@@ -890,11 +926,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 
        switch (cmd) {
        case NVME_IOCTL_ADMIN_CMD:
-               return nvme_user_cmd(ctrl, NULL, argp);
+               return nvme_user_cmd(ctrl, NULL, argp, file->f_mode);
        case NVME_IOCTL_ADMIN64_CMD:
-               return nvme_user_cmd64(ctrl, NULL, argp, false);
+               return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode);
        case NVME_IOCTL_IO_CMD:
-               return nvme_dev_user_cmd(ctrl, argp);
+               return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
        case NVME_IOCTL_RESET:
                if (!capable(CAP_SYS_ADMIN))
                        return -EACCES;
index 050d7d0cd81b0c8163cfcd6a23eaf414dc26f43d..1d102b662e887f7327c8e4b23817ca72c16cb564 100644 (file)
@@ -797,6 +797,7 @@ enum nvme_opcode {
        nvme_cmd_zone_mgmt_send = 0x79,
        nvme_cmd_zone_mgmt_recv = 0x7a,
        nvme_cmd_zone_append    = 0x7d,
+       nvme_cmd_vendor_start   = 0x80,
 };
 
 #define nvme_opcode_name(opcode)       { opcode, #opcode }