nvmet: add simple file backed ns support
authorChaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Wed, 23 May 2018 04:34:39 +0000 (00:34 -0400)
committerChristoph Hellwig <hch@lst.de>
Fri, 25 May 2018 14:50:12 +0000 (16:50 +0200)
This patch adds simple file backed namespace support for NVMeOF target.

The new file io-cmd-file.c is responsible for handling the code for I/O
commands when ns is file backed. Also, we introduce mempools based slow
path using sync I/Os for file backed ns to ensure forward progress under
reclaim.

The old block device based implementation is moved to io-cmd-bdev.c and
use a "nvmet_bdev_" symbol prefix.  The enable/disable calls are also
move into the respective files.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
[hch: updated changelog, fixed double req->ns lookup in bdev case]
Signed-off-by: Christoph Hellwig <hch@lst.de>
drivers/nvme/target/Makefile
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/io-cmd-bdev.c [new file with mode: 0644]
drivers/nvme/target/io-cmd-file.c [new file with mode: 0644]
drivers/nvme/target/io-cmd.c [deleted file]
drivers/nvme/target/nvmet.h

index 488250189c992d1d7e8e7442b746010934606726..8118c93391c6f899d656802fc05c6163d0c7ef29 100644 (file)
@@ -6,8 +6,8 @@ obj-$(CONFIG_NVME_TARGET_RDMA)          += nvmet-rdma.o
 obj-$(CONFIG_NVME_TARGET_FC)           += nvmet-fc.o
 obj-$(CONFIG_NVME_TARGET_FCLOOP)       += nvme-fcloop.o
 
-nvmet-y                += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
-                       discovery.o
+nvmet-y                += core.o configfs.o admin-cmd.o fabrics-cmd.o \
+                       discovery.o io-cmd-file.o io-cmd-bdev.o
 nvme-loop-y    += loop.o
 nvmet-rdma-y   += rdma.o
 nvmet-fc-y     += fc.o
index d1afcfd89aa3a1f56521f234b660739776c08de8..b2ba95b2eef7f526fe880413ce6984bba32e1bed 100644 (file)
@@ -45,6 +45,10 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
                return NVME_SC_INVALID_NS;
        }
 
+       /* we don't have the right data for file backed ns */
+       if (!ns->bdev)
+               goto out;
+
        host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
        data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
        host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
@@ -54,6 +58,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
        put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
        put_unaligned_le64(host_writes, &slog->host_writes[0]);
        put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+out:
        nvmet_put_namespace(ns);
 
        return NVME_SC_SUCCESS;
@@ -71,6 +76,9 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
 
        rcu_read_lock();
        list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+               /* we don't have the right data for file backed ns */
+               if (!ns->bdev)
+                       continue;
                host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
                data_units_read +=
                        part_stat_read(ns->bdev->bd_part, sectors[READ]);
index 6d8eaf3f89c57e09403e03920c667ab51f3231b7..800aaf96ddcd42f3094847bfb42207715567e35d 100644 (file)
@@ -271,6 +271,12 @@ void nvmet_put_namespace(struct nvmet_ns *ns)
        percpu_ref_put(&ns->ref);
 }
 
+static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
+{
+       nvmet_bdev_ns_disable(ns);
+       nvmet_file_ns_disable(ns);
+}
+
 int nvmet_ns_enable(struct nvmet_ns *ns)
 {
        struct nvmet_subsys *subsys = ns->subsys;
@@ -281,23 +287,16 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
        if (ns->enabled)
                goto out_unlock;
 
-       ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
-                       NULL);
-       if (IS_ERR(ns->bdev)) {
-               pr_err("failed to open block device %s: (%ld)\n",
-                      ns->device_path, PTR_ERR(ns->bdev));
-               ret = PTR_ERR(ns->bdev);
-               ns->bdev = NULL;
+       ret = nvmet_bdev_ns_enable(ns);
+       if (ret)
+               ret = nvmet_file_ns_enable(ns);
+       if (ret)
                goto out_unlock;
-       }
-
-       ns->size = i_size_read(ns->bdev->bd_inode);
-       ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 
        ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
                                0, GFP_KERNEL);
        if (ret)
-               goto out_blkdev_put;
+               goto out_dev_put;
 
        if (ns->nsid > subsys->max_nsid)
                subsys->max_nsid = ns->nsid;
@@ -328,9 +327,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 out_unlock:
        mutex_unlock(&subsys->lock);
        return ret;
-out_blkdev_put:
-       blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
-       ns->bdev = NULL;
+out_dev_put:
+       nvmet_ns_dev_disable(ns);
        goto out_unlock;
 }
 
@@ -366,8 +364,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
        list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
                nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 
-       if (ns->bdev)
-               blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+       nvmet_ns_dev_disable(ns);
 out_unlock:
        mutex_unlock(&subsys->lock);
 }
@@ -499,6 +496,25 @@ int nvmet_sq_init(struct nvmet_sq *sq)
 }
 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 
+static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+       struct nvme_command *cmd = req->cmd;
+       u16 ret;
+
+       ret = nvmet_check_ctrl_status(req, cmd);
+       if (unlikely(ret))
+               return ret;
+
+       req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+       if (unlikely(!req->ns))
+               return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+       if (req->ns->file)
+               return nvmet_file_parse_io_cmd(req);
+       else
+               return nvmet_bdev_parse_io_cmd(req);
+}
+
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
                struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
 {
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
new file mode 100644 (file)
index 0000000..e0b0f7d
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * NVMe I/O command implementation.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
+{
+       int ret;
+
+       ns->bdev = blkdev_get_by_path(ns->device_path,
+                       FMODE_READ | FMODE_WRITE, NULL);
+       if (IS_ERR(ns->bdev)) {
+               ret = PTR_ERR(ns->bdev);
+               if (ret != -ENOTBLK) {
+                       pr_err("failed to open block device %s: (%ld)\n",
+                                       ns->device_path, PTR_ERR(ns->bdev));
+               }
+               ns->bdev = NULL;
+               return ret;
+       }
+       ns->size = i_size_read(ns->bdev->bd_inode);
+       ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+       return 0;
+}
+
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
+{
+       if (ns->bdev) {
+               blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+               ns->bdev = NULL;
+       }
+}
+
+static void nvmet_bio_done(struct bio *bio)
+{
+       struct nvmet_req *req = bio->bi_private;
+
+       nvmet_req_complete(req,
+               bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+
+       if (bio != &req->b.inline_bio)
+               bio_put(bio);
+}
+
+static void nvmet_bdev_execute_rw(struct nvmet_req *req)
+{
+       int sg_cnt = req->sg_cnt;
+       struct bio *bio = &req->b.inline_bio;
+       struct scatterlist *sg;
+       sector_t sector;
+       blk_qc_t cookie;
+       int op, op_flags = 0, i;
+
+       if (!req->sg_cnt) {
+               nvmet_req_complete(req, 0);
+               return;
+       }
+
+       if (req->cmd->rw.opcode == nvme_cmd_write) {
+               op = REQ_OP_WRITE;
+               op_flags = REQ_SYNC | REQ_IDLE;
+               if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+                       op_flags |= REQ_FUA;
+       } else {
+               op = REQ_OP_READ;
+       }
+
+       sector = le64_to_cpu(req->cmd->rw.slba);
+       sector <<= (req->ns->blksize_shift - 9);
+
+       bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+       bio_set_dev(bio, req->ns->bdev);
+       bio->bi_iter.bi_sector = sector;
+       bio->bi_private = req;
+       bio->bi_end_io = nvmet_bio_done;
+       bio_set_op_attrs(bio, op, op_flags);
+
+       for_each_sg(req->sg, sg, req->sg_cnt, i) {
+               while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+                               != sg->length) {
+                       struct bio *prev = bio;
+
+                       bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+                       bio_set_dev(bio, req->ns->bdev);
+                       bio->bi_iter.bi_sector = sector;
+                       bio_set_op_attrs(bio, op, op_flags);
+
+                       bio_chain(bio, prev);
+                       submit_bio(prev);
+               }
+
+               sector += sg->length >> 9;
+               sg_cnt--;
+       }
+
+       cookie = submit_bio(bio);
+
+       blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+}
+
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+{
+       struct bio *bio = &req->b.inline_bio;
+
+       bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+       bio_set_dev(bio, req->ns->bdev);
+       bio->bi_private = req;
+       bio->bi_end_io = nvmet_bio_done;
+       bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+       submit_bio(bio);
+}
+
+static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
+               struct nvme_dsm_range *range, struct bio **bio)
+{
+       int ret;
+
+       ret = __blkdev_issue_discard(ns->bdev,
+                       le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
+                       le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
+                       GFP_KERNEL, 0, bio);
+       if (ret && ret != -EOPNOTSUPP)
+               return NVME_SC_INTERNAL | NVME_SC_DNR;
+       return 0;
+}
+
+static void nvmet_bdev_execute_discard(struct nvmet_req *req)
+{
+       struct nvme_dsm_range range;
+       struct bio *bio = NULL;
+       int i;
+       u16 status;
+
+       for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+               status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+                               sizeof(range));
+               if (status)
+                       break;
+
+               status = nvmet_bdev_discard_range(req->ns, &range, &bio);
+               if (status)
+                       break;
+       }
+
+       if (bio) {
+               bio->bi_private = req;
+               bio->bi_end_io = nvmet_bio_done;
+               if (status) {
+                       bio->bi_status = BLK_STS_IOERR;
+                       bio_endio(bio);
+               } else {
+                       submit_bio(bio);
+               }
+       } else {
+               nvmet_req_complete(req, status);
+       }
+}
+
+static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
+{
+       switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+       case NVME_DSMGMT_AD:
+               nvmet_bdev_execute_discard(req);
+               return;
+       case NVME_DSMGMT_IDR:
+       case NVME_DSMGMT_IDW:
+       default:
+               /* Not supported yet */
+               nvmet_req_complete(req, 0);
+               return;
+       }
+}
+
+static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
+{
+       struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+       struct bio *bio = NULL;
+       u16 status = NVME_SC_SUCCESS;
+       sector_t sector;
+       sector_t nr_sector;
+
+       sector = le64_to_cpu(write_zeroes->slba) <<
+               (req->ns->blksize_shift - 9);
+       nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+               (req->ns->blksize_shift - 9));
+
+       if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
+                               GFP_KERNEL, &bio, 0))
+               status = NVME_SC_INTERNAL | NVME_SC_DNR;
+
+       if (bio) {
+               bio->bi_private = req;
+               bio->bi_end_io = nvmet_bio_done;
+               submit_bio(bio);
+       } else {
+               nvmet_req_complete(req, status);
+       }
+}
+
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
+{
+       struct nvme_command *cmd = req->cmd;
+
+       switch (cmd->common.opcode) {
+       case nvme_cmd_read:
+       case nvme_cmd_write:
+               req->execute = nvmet_bdev_execute_rw;
+               req->data_len = nvmet_rw_len(req);
+               return 0;
+       case nvme_cmd_flush:
+               req->execute = nvmet_bdev_execute_flush;
+               req->data_len = 0;
+               return 0;
+       case nvme_cmd_dsm:
+               req->execute = nvmet_bdev_execute_dsm;
+               req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+                       sizeof(struct nvme_dsm_range);
+               return 0;
+       case nvme_cmd_write_zeroes:
+               req->execute = nvmet_bdev_execute_write_zeroes;
+               return 0;
+       default:
+               pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+                      req->sq->qid);
+               return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+       }
+}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
new file mode 100644 (file)
index 0000000..ca1ccf8
--- /dev/null
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target File I/O commands implementation.
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/uio.h>
+#include <linux/falloc.h>
+#include <linux/file.h>
+#include "nvmet.h"
+
+#define NVMET_MAX_MPOOL_BVEC           16
+#define NVMET_MIN_MPOOL_OBJ            16
+
+void nvmet_file_ns_disable(struct nvmet_ns *ns)
+{
+       if (ns->file) {
+               mempool_destroy(ns->bvec_pool);
+               ns->bvec_pool = NULL;
+               kmem_cache_destroy(ns->bvec_cache);
+               ns->bvec_cache = NULL;
+               fput(ns->file);
+               ns->file = NULL;
+       }
+}
+
+int nvmet_file_ns_enable(struct nvmet_ns *ns)
+{
+       int ret;
+       struct kstat stat;
+
+       ns->file = filp_open(ns->device_path,
+                       O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+       if (IS_ERR(ns->file)) {
+               pr_err("failed to open file %s: (%ld)\n",
+                               ns->device_path, PTR_ERR(ns->bdev));
+               return PTR_ERR(ns->file);
+       }
+
+       ret = vfs_getattr(&ns->file->f_path,
+                       &stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+       if (ret)
+               goto err;
+
+       ns->size = stat.size;
+       ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+
+       ns->bvec_cache = kmem_cache_create("nvmet-bvec",
+                       NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
+                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ns->bvec_cache)
+               goto err;
+
+       ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
+                       mempool_free_slab, ns->bvec_cache);
+
+       if (!ns->bvec_pool)
+               goto err;
+
+       return ret;
+err:
+       ns->size = 0;
+       ns->blksize_shift = 0;
+       nvmet_file_ns_disable(ns);
+       return ret;
+}
+
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+{
+       bv->bv_page = sg_page_iter_page(iter);
+       bv->bv_offset = iter->sg->offset;
+       bv->bv_len = PAGE_SIZE - iter->sg->offset;
+}
+
+static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
+               unsigned long nr_segs, size_t count)
+{
+       struct kiocb *iocb = &req->f.iocb;
+       ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
+       struct iov_iter iter;
+       int ki_flags = 0, rw;
+       ssize_t ret;
+
+       if (req->cmd->rw.opcode == nvme_cmd_write) {
+               if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+                       ki_flags = IOCB_DSYNC;
+               call_iter = req->ns->file->f_op->write_iter;
+               rw = WRITE;
+       } else {
+               call_iter = req->ns->file->f_op->read_iter;
+               rw = READ;
+       }
+
+       iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+
+       iocb->ki_pos = pos;
+       iocb->ki_filp = req->ns->file;
+       iocb->ki_flags = IOCB_DIRECT | ki_flags;
+
+       ret = call_iter(iocb, &iter);
+
+       if (ret != -EIOCBQUEUED && iocb->ki_complete)
+               iocb->ki_complete(iocb, ret, 0);
+
+       return ret;
+}
+
+static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
+{
+       struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+
+       if (req->f.bvec != req->inline_bvec) {
+               if (likely(req->f.mpool_alloc == false))
+                       kfree(req->f.bvec);
+               else
+                       mempool_free(req->f.bvec, req->ns->bvec_pool);
+       }
+
+       nvmet_req_complete(req, ret != req->data_len ?
+                       NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+       ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+       struct sg_page_iter sg_pg_iter;
+       unsigned long bv_cnt = 0;
+       bool is_sync = false;
+       size_t len = 0, total_len = 0;
+       ssize_t ret = 0;
+       loff_t pos;
+
+       if (!req->sg_cnt || !nr_bvec) {
+               nvmet_req_complete(req, 0);
+               return;
+       }
+
+       if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
+               req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+                               GFP_KERNEL);
+       else
+               req->f.bvec = req->inline_bvec;
+
+       req->f.mpool_alloc = false;
+       if (unlikely(!req->f.bvec)) {
+               /* fallback under memory pressure */
+               req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
+               req->f.mpool_alloc = true;
+               if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
+                       is_sync = true;
+       }
+
+       pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
+
+       memset(&req->f.iocb, 0, sizeof(struct kiocb));
+       for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
+               nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+               len += req->f.bvec[bv_cnt].bv_len;
+               total_len += req->f.bvec[bv_cnt].bv_len;
+               bv_cnt++;
+
+               WARN_ON_ONCE((nr_bvec - 1) < 0);
+
+               if (unlikely(is_sync) &&
+                   (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
+                       ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
+                       if (ret < 0)
+                               goto out;
+                       pos += len;
+                       bv_cnt = 0;
+                       len = 0;
+               }
+               nr_bvec--;
+       }
+
+       if (WARN_ON_ONCE(total_len != req->data_len))
+               ret = -EIO;
+out:
+       if (unlikely(is_sync || ret)) {
+               nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
+               return;
+       }
+       req->f.iocb.ki_complete = nvmet_file_io_done;
+       nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
+}
+
+static void nvmet_file_flush_work(struct work_struct *w)
+{
+       struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+       int ret;
+
+       ret = vfs_fsync(req->ns->file, 1);
+
+       nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_flush(struct nvmet_req *req)
+{
+       INIT_WORK(&req->f.work, nvmet_file_flush_work);
+       schedule_work(&req->f.work);
+}
+
+static void nvmet_file_execute_discard(struct nvmet_req *req)
+{
+       int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+       struct nvme_dsm_range range;
+       loff_t offset;
+       loff_t len;
+       int i, ret;
+
+       for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+               if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+                                       sizeof(range)))
+                       break;
+               offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
+               len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+               ret = vfs_fallocate(req->ns->file, mode, offset, len);
+               if (ret)
+                       break;
+       }
+
+       nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_dsm_work(struct work_struct *w)
+{
+       struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+       switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+       case NVME_DSMGMT_AD:
+               nvmet_file_execute_discard(req);
+               return;
+       case NVME_DSMGMT_IDR:
+       case NVME_DSMGMT_IDW:
+       default:
+               /* Not supported yet */
+               nvmet_req_complete(req, 0);
+               return;
+       }
+}
+
+static void nvmet_file_execute_dsm(struct nvmet_req *req)
+{
+       INIT_WORK(&req->f.work, nvmet_file_dsm_work);
+       schedule_work(&req->f.work);
+}
+
+static void nvmet_file_write_zeroes_work(struct work_struct *w)
+{
+       struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+       struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+       int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
+       loff_t offset;
+       loff_t len;
+       int ret;
+
+       offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
+       len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+                       req->ns->blksize_shift);
+
+       ret = vfs_fallocate(req->ns->file, mode, offset, len);
+       nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
+{
+       INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
+       schedule_work(&req->f.work);
+}
+
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
+{
+       struct nvme_command *cmd = req->cmd;
+
+       switch (cmd->common.opcode) {
+       case nvme_cmd_read:
+       case nvme_cmd_write:
+               req->execute = nvmet_file_execute_rw;
+               req->data_len = nvmet_rw_len(req);
+               return 0;
+       case nvme_cmd_flush:
+               req->execute = nvmet_file_execute_flush;
+               req->data_len = 0;
+               return 0;
+       case nvme_cmd_dsm:
+               req->execute = nvmet_file_execute_dsm;
+               req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+                       sizeof(struct nvme_dsm_range);
+               return 0;
+       case nvme_cmd_write_zeroes:
+               req->execute = nvmet_file_execute_write_zeroes;
+               req->data_len = 0;
+               return 0;
+       default:
+               pr_err("unhandled cmd for file ns %d on qid %d\n",
+                               cmd->common.opcode, req->sq->qid);
+               return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+       }
+}
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
deleted file mode 100644 (file)
index e5eb2db..0000000
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * NVMe I/O command implementation.
- * Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include "nvmet.h"
-
-static void nvmet_bio_done(struct bio *bio)
-{
-       struct nvmet_req *req = bio->bi_private;
-
-       nvmet_req_complete(req,
-               bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
-
-       if (bio != &req->inline_bio)
-               bio_put(bio);
-}
-
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
-{
-       return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
-                       req->ns->blksize_shift;
-}
-
-static void nvmet_execute_rw(struct nvmet_req *req)
-{
-       int sg_cnt = req->sg_cnt;
-       struct bio *bio = &req->inline_bio;
-       struct scatterlist *sg;
-       sector_t sector;
-       blk_qc_t cookie;
-       int op, op_flags = 0, i;
-
-       if (!req->sg_cnt) {
-               nvmet_req_complete(req, 0);
-               return;
-       }
-
-       if (req->cmd->rw.opcode == nvme_cmd_write) {
-               op = REQ_OP_WRITE;
-               op_flags = REQ_SYNC | REQ_IDLE;
-               if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
-                       op_flags |= REQ_FUA;
-       } else {
-               op = REQ_OP_READ;
-       }
-
-       sector = le64_to_cpu(req->cmd->rw.slba);
-       sector <<= (req->ns->blksize_shift - 9);
-
-       bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-       bio_set_dev(bio, req->ns->bdev);
-       bio->bi_iter.bi_sector = sector;
-       bio->bi_private = req;
-       bio->bi_end_io = nvmet_bio_done;
-       bio_set_op_attrs(bio, op, op_flags);
-
-       for_each_sg(req->sg, sg, req->sg_cnt, i) {
-               while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
-                               != sg->length) {
-                       struct bio *prev = bio;
-
-                       bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
-                       bio_set_dev(bio, req->ns->bdev);
-                       bio->bi_iter.bi_sector = sector;
-                       bio_set_op_attrs(bio, op, op_flags);
-
-                       bio_chain(bio, prev);
-                       submit_bio(prev);
-               }
-
-               sector += sg->length >> 9;
-               sg_cnt--;
-       }
-
-       cookie = submit_bio(bio);
-
-       blk_poll(bdev_get_queue(req->ns->bdev), cookie);
-}
-
-static void nvmet_execute_flush(struct nvmet_req *req)
-{
-       struct bio *bio = &req->inline_bio;
-
-       bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-       bio_set_dev(bio, req->ns->bdev);
-       bio->bi_private = req;
-       bio->bi_end_io = nvmet_bio_done;
-       bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-       submit_bio(bio);
-}
-
-static u16 nvmet_discard_range(struct nvmet_ns *ns,
-               struct nvme_dsm_range *range, struct bio **bio)
-{
-       int ret;
-
-       ret = __blkdev_issue_discard(ns->bdev,
-                       le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
-                       le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-                       GFP_KERNEL, 0, bio);
-       if (ret && ret != -EOPNOTSUPP)
-               return NVME_SC_INTERNAL | NVME_SC_DNR;
-       return 0;
-}
-
-static void nvmet_execute_discard(struct nvmet_req *req)
-{
-       struct nvme_dsm_range range;
-       struct bio *bio = NULL;
-       int i;
-       u16 status;
-
-       for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
-               status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
-                               sizeof(range));
-               if (status)
-                       break;
-
-               status = nvmet_discard_range(req->ns, &range, &bio);
-               if (status)
-                       break;
-       }
-
-       if (bio) {
-               bio->bi_private = req;
-               bio->bi_end_io = nvmet_bio_done;
-               if (status) {
-                       bio->bi_status = BLK_STS_IOERR;
-                       bio_endio(bio);
-               } else {
-                       submit_bio(bio);
-               }
-       } else {
-               nvmet_req_complete(req, status);
-       }
-}
-
-static void nvmet_execute_dsm(struct nvmet_req *req)
-{
-       switch (le32_to_cpu(req->cmd->dsm.attributes)) {
-       case NVME_DSMGMT_AD:
-               nvmet_execute_discard(req);
-               return;
-       case NVME_DSMGMT_IDR:
-       case NVME_DSMGMT_IDW:
-       default:
-               /* Not supported yet */
-               nvmet_req_complete(req, 0);
-               return;
-       }
-}
-
-static void nvmet_execute_write_zeroes(struct nvmet_req *req)
-{
-       struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
-       struct bio *bio = NULL;
-       u16 status = NVME_SC_SUCCESS;
-       sector_t sector;
-       sector_t nr_sector;
-
-       sector = le64_to_cpu(write_zeroes->slba) <<
-               (req->ns->blksize_shift - 9);
-       nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
-               (req->ns->blksize_shift - 9));
-
-       if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
-                               GFP_KERNEL, &bio, 0))
-               status = NVME_SC_INTERNAL | NVME_SC_DNR;
-
-       if (bio) {
-               bio->bi_private = req;
-               bio->bi_end_io = nvmet_bio_done;
-               submit_bio(bio);
-       } else {
-               nvmet_req_complete(req, status);
-       }
-}
-
-u16 nvmet_parse_io_cmd(struct nvmet_req *req)
-{
-       struct nvme_command *cmd = req->cmd;
-       u16 ret;
-
-       ret = nvmet_check_ctrl_status(req, cmd);
-       if (unlikely(ret))
-               return ret;
-
-       req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
-       if (unlikely(!req->ns))
-               return NVME_SC_INVALID_NS | NVME_SC_DNR;
-
-       switch (cmd->common.opcode) {
-       case nvme_cmd_read:
-       case nvme_cmd_write:
-               req->execute = nvmet_execute_rw;
-               req->data_len = nvmet_rw_len(req);
-               return 0;
-       case nvme_cmd_flush:
-               req->execute = nvmet_execute_flush;
-               req->data_len = 0;
-               return 0;
-       case nvme_cmd_dsm:
-               req->execute = nvmet_execute_dsm;
-               req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
-                       sizeof(struct nvme_dsm_range);
-               return 0;
-       case nvme_cmd_write_zeroes:
-               req->execute = nvmet_execute_write_zeroes;
-               return 0;
-       default:
-               pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
-                      req->sq->qid);
-               return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-       }
-}
index 15fd84ab21f80629df2a1d3e6150032e389c931e..2d09afcfe505529eff0d8b066d11a978c9a9d45a 100644 (file)
@@ -43,6 +43,7 @@ struct nvmet_ns {
        struct list_head        dev_link;
        struct percpu_ref       ref;
        struct block_device     *bdev;
+       struct file             *file;
        u32                     nsid;
        u32                     blksize_shift;
        loff_t                  size;
@@ -57,6 +58,8 @@ struct nvmet_ns {
        struct config_group     group;
 
        struct completion       disable_done;
+       mempool_t               *bvec_pool;
+       struct kmem_cache       *bvec_cache;
 };
 
 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -222,8 +225,18 @@ struct nvmet_req {
        struct nvmet_cq         *cq;
        struct nvmet_ns         *ns;
        struct scatterlist      *sg;
-       struct bio              inline_bio;
        struct bio_vec          inline_bvec[NVMET_MAX_INLINE_BIOVEC];
+       union {
+               struct {
+                       struct bio      inline_bio;
+               } b;
+               struct {
+                       bool                    mpool_alloc;
+                       struct kiocb            iocb;
+                       struct bio_vec          *bvec;
+                       struct work_struct      work;
+               } f;
+       };
        int                     sg_cnt;
        /* data length as parsed from the command: */
        size_t                  data_len;
@@ -263,7 +276,8 @@ struct nvmet_async_event {
 };
 
 u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
-u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
 u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
@@ -338,4 +352,14 @@ extern struct rw_semaphore nvmet_config_sem;
 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
                const char *hostnqn);
 
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
+int nvmet_file_ns_enable(struct nvmet_ns *ns);
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
+void nvmet_file_ns_disable(struct nvmet_ns *ns);
+
+static inline u32 nvmet_rw_len(struct nvmet_req *req)
+{
+       return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
+                       req->ns->blksize_shift;
+}
 #endif /* _NVMET_H */