Merge tag 'for-linus-20190329' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Mar 2019 21:43:07 +0000 (14:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 29 Mar 2019 21:43:07 +0000 (14:43 -0700)
Pull block fixes from Jens Axboe:
 "Small set of fixes that should go into this series. This contains:

   - compat signal mask fix for io_uring (Arnd)

   - EAGAIN corner case for direct vs buffered writes for io_uring
     (Roman)

   - NVMe pull request from Christoph with various little fixes

   - sbitmap ws_active fix, which caused a perf regression for shared
     tags (me)

   - sbitmap bit ordering fix (Ming)

   - libata on-stack DMA fix (Raymond)"

* tag 'for-linus-20190329' of git://git.kernel.dk/linux-block:
  nvmet: fix error flow during ns enable
  nvmet: fix building bvec from sg list
  nvme-multipath: relax ANA state check
  nvme-tcp: fix an endianess miss-annotation
  libata: fix using DMA buffers on stack
  io_uring: offload write to async worker in case of -EAGAIN
  sbitmap: order READ/WRITE freed instance and setting clear bit
  blk-mq: fix sbitmap ws_active for shared tags
  io_uring: fix big-endian compat signal mask handling
  blk-mq: update comment for blk_mq_hctx_has_pending()
  blk-mq: use blk_mq_put_driver_tag() to put tag

block/blk-flush.c
block/blk-mq.c
block/blk-mq.h
drivers/ata/libata-zpodd.c
drivers/nvme/host/multipath.c
drivers/nvme/host/tcp.c
drivers/nvme/target/core.c
drivers/nvme/target/io-cmd-file.c
fs/io_uring.c
lib/sbitmap.c

index 6e0f2d97fc6d8f0a5b14e6dbea23f817706bef7a..d95f9489201526081abf8b8bdcc65a525cf4c179 100644 (file)
@@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
                blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
                flush_rq->tag = -1;
        } else {
-               blk_mq_put_driver_tag_hctx(hctx, flush_rq);
+               blk_mq_put_driver_tag(flush_rq);
                flush_rq->internal_tag = -1;
        }
 
@@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 
        if (q->elevator) {
                WARN_ON(rq->tag < 0);
-               blk_mq_put_driver_tag_hctx(hctx, rq);
+               blk_mq_put_driver_tag(rq);
        }
 
        /*
index 70b210a308c452b43abd1a270e759f5e331ab55f..3ff3d7b4996973458fa44a89133ed4ec5b65b2d4 100644 (file)
@@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
 }
 
 /*
- * Check if any of the ctx's have pending work in this hardware queue
+ * Check if any of the ctx, dispatch list or elevator
+ * have pending work in this hardware queue.
  */
 static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
 {
@@ -1071,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
        hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 
        spin_lock(&hctx->dispatch_wait_lock);
-       list_del_init(&wait->entry);
+       if (!list_empty(&wait->entry)) {
+               struct sbitmap_queue *sbq;
+
+               list_del_init(&wait->entry);
+               sbq = &hctx->tags->bitmap_tags;
+               atomic_dec(&sbq->ws_active);
+       }
        spin_unlock(&hctx->dispatch_wait_lock);
 
        blk_mq_run_hw_queue(hctx, true);
@@ -1087,6 +1094,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
 static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
                                 struct request *rq)
 {
+       struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
        struct wait_queue_head *wq;
        wait_queue_entry_t *wait;
        bool ret;
@@ -1109,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
        if (!list_empty_careful(&wait->entry))
                return false;
 
-       wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait;
+       wq = &bt_wait_ptr(sbq, hctx)->wait;
 
        spin_lock_irq(&wq->lock);
        spin_lock(&hctx->dispatch_wait_lock);
@@ -1119,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
                return false;
        }
 
+       atomic_inc(&sbq->ws_active);
        wait->flags &= ~WQ_FLAG_EXCLUSIVE;
        __add_wait_queue(wq, wait);
 
@@ -1139,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
         * someone else gets the wakeup.
         */
        list_del_init(&wait->entry);
+       atomic_dec(&sbq->ws_active);
        spin_unlock(&hctx->dispatch_wait_lock);
        spin_unlock_irq(&wq->lock);
 
index 0ed8e5a8729fccd39d5da0e58854bdb5c5de42da..d704fc7766f45458fd7f186a0111c859e09baafc 100644 (file)
@@ -224,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
        }
 }
 
-static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
-                                      struct request *rq)
-{
-       if (rq->tag == -1 || rq->internal_tag == -1)
-               return;
-
-       __blk_mq_put_driver_tag(hctx, rq);
-}
-
 static inline void blk_mq_put_driver_tag(struct request *rq)
 {
        if (rq->tag == -1 || rq->internal_tag == -1)
index b3ed8f9953a862ea3ae67ef065ca5469330a44e0..173e6f2dd9af0f12afdc1fee7e372cfa4291e0aa 100644 (file)
@@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev)
 /* Per the spec, only slot type and drawer type ODD can be supported */
 static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
 {
-       char buf[16];
+       char *buf;
        unsigned int ret;
-       struct rm_feature_desc *desc = (void *)(buf + 8);
+       struct rm_feature_desc *desc;
        struct ata_taskfile tf;
        static const char cdb[] = {  GPCMD_GET_CONFIGURATION,
                        2,      /* only 1 feature descriptor requested */
                        0, 3,   /* 3, removable medium feature */
                        0, 0, 0,/* reserved */
-                       0, sizeof(buf),
+                       0, 16,
                        0, 0, 0,
        };
 
+       buf = kzalloc(16, GFP_KERNEL);
+       if (!buf)
+               return ODD_MECH_TYPE_UNSUPPORTED;
+       desc = (void *)(buf + 8);
+
        ata_tf_init(dev, &tf);
        tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
        tf.command = ATA_CMD_PACKET;
        tf.protocol = ATAPI_PROT_PIO;
-       tf.lbam = sizeof(buf);
+       tf.lbam = 16;
 
        ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
-                               buf, sizeof(buf), 0);
-       if (ret)
+                               buf, 16, 0);
+       if (ret) {
+               kfree(buf);
                return ODD_MECH_TYPE_UNSUPPORTED;
+       }
 
-       if (be16_to_cpu(desc->feature_code) != 3)
+       if (be16_to_cpu(desc->feature_code) != 3) {
+               kfree(buf);
                return ODD_MECH_TYPE_UNSUPPORTED;
+       }
 
-       if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1)
+       if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) {
+               kfree(buf);
                return ODD_MECH_TYPE_SLOT;
-       else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1)
+       } else if (desc->mech_type == 1 && desc->load == 0 &&
+                  desc->eject == 1) {
+               kfree(buf);
                return ODD_MECH_TYPE_DRAWER;
-       else
+       } else {
+               kfree(buf);
                return ODD_MECH_TYPE_UNSUPPORTED;
+       }
 }
 
 /* Test if ODD is zero power ready by sense code */
index 2839bb70badfbcb8284bc5bbbc1f457bd3b58c63..f0716f6ce41fa2a1ad993e45adba9148d7f0c120 100644 (file)
@@ -404,15 +404,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
 static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
                struct nvme_ns *ns)
 {
-       enum nvme_ana_state old;
-
        mutex_lock(&ns->head->lock);
-       old = ns->ana_state;
        ns->ana_grpid = le32_to_cpu(desc->grpid);
        ns->ana_state = desc->state;
        clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
 
-       if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old))
+       if (nvme_state_is_live(ns->ana_state))
                nvme_mpath_set_live(ns);
        mutex_unlock(&ns->head->lock);
 }
index e7e08889865e732d503a6ac2af5d38cac4dd9672..68c49dd672104d82ea768a6e9bf4354df731422b 100644 (file)
@@ -627,7 +627,7 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
        return ret;
 }
 
-static inline void nvme_tcp_end_request(struct request *rq, __le16 status)
+static inline void nvme_tcp_end_request(struct request *rq, u16 status)
 {
        union nvme_result res = {};
 
index 2d73b66e368627cdee268a74d30fb3c5d6a34235..b3e765a95af8ee7447c536ff48095504c8100d67 100644 (file)
@@ -509,7 +509,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 
        ret = nvmet_p2pmem_ns_enable(ns);
        if (ret)
-               goto out_unlock;
+               goto out_dev_disable;
 
        list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
                nvmet_p2pmem_ns_add_p2p(ctrl, ns);
@@ -550,7 +550,7 @@ out_unlock:
 out_dev_put:
        list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
                pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
-
+out_dev_disable:
        nvmet_ns_dev_disable(ns);
        goto out_unlock;
 }
index 3e43212d3c1c6bba5a6d553dc2a965188c5ccbf5..bc6ebb51b0bf7c5310940fca19450fd115ea7788 100644 (file)
@@ -75,11 +75,11 @@ err:
        return ret;
 }
 
-static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
 {
-       bv->bv_page = sg_page_iter_page(iter);
-       bv->bv_offset = iter->sg->offset;
-       bv->bv_len = PAGE_SIZE - iter->sg->offset;
+       bv->bv_page = sg_page(sg);
+       bv->bv_offset = sg->offset;
+       bv->bv_len = sg->length;
 }
 
 static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
@@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
 
 static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
 {
-       ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
-       struct sg_page_iter sg_pg_iter;
+       ssize_t nr_bvec = req->sg_cnt;
        unsigned long bv_cnt = 0;
        bool is_sync = false;
        size_t len = 0, total_len = 0;
        ssize_t ret = 0;
        loff_t pos;
-
+       int i;
+       struct scatterlist *sg;
 
        if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
                is_sync = true;
@@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
        }
 
        memset(&req->f.iocb, 0, sizeof(struct kiocb));
-       for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
-               nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+       for_each_sg(req->sg, sg, req->sg_cnt, i) {
+               nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
                len += req->f.bvec[bv_cnt].bv_len;
                total_len += req->f.bvec[bv_cnt].bv_len;
                bv_cnt++;
@@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
 
 static void nvmet_file_execute_rw(struct nvmet_req *req)
 {
-       ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+       ssize_t nr_bvec = req->sg_cnt;
 
        if (!req->sg_cnt || !nr_bvec) {
                nvmet_req_complete(req, 0);
index 6aaa30580a2b2057fca13404a44e0b3773450288..bbdbd56cf2ac9384b83e78945b2c6c0031cc346d 100644 (file)
@@ -1022,6 +1022,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
 
        ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
        if (!ret) {
+               ssize_t ret2;
+
                /*
                 * Open-code file_start_write here to grab freeze protection,
                 * which will be released by another thread in
@@ -1036,7 +1038,19 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
                                                SB_FREEZE_WRITE);
                }
                kiocb->ki_flags |= IOCB_WRITE;
-               io_rw_done(kiocb, call_write_iter(file, kiocb, &iter));
+
+               ret2 = call_write_iter(file, kiocb, &iter);
+               if (!force_nonblock || ret2 != -EAGAIN) {
+                       io_rw_done(kiocb, ret2);
+               } else {
+                       /*
+                        * If ->needs_lock is true, we're already in async
+                        * context.
+                        */
+                       if (!s->needs_lock)
+                               io_async_list_note(WRITE, req, iov_count);
+                       ret = -EAGAIN;
+               }
        }
 out_free:
        kfree(iovec);
@@ -1968,7 +1982,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                return 0;
 
        if (sig) {
-               ret = set_user_sigmask(sig, &ksigmask, &sigsaved, sigsz);
+#ifdef CONFIG_COMPAT
+               if (in_compat_syscall())
+                       ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+                                                     &ksigmask, &sigsaved, sigsz);
+               else
+#endif
+                       ret = set_user_sigmask(sig, &ksigmask,
+                                              &sigsaved, sigsz);
+
                if (ret)
                        return ret;
        }
index 5b382c1244ede33c14016142ac2d7fec4d0608da..155fe38756ecfda251f26fa8616a325dddd8d455 100644 (file)
@@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
                         unsigned int cpu)
 {
+       /*
+        * Once the clear bit is set, the bit may be allocated out.
+        *
+        * Orders READ/WRITE on the asssociated instance(such as request
+        * of blk_mq) by this bit for avoiding race with re-allocation,
+        * and its pair is the memory barrier implied in __sbitmap_get_word.
+        *
+        * One invariant is that the clear bit has to be zero when the bit
+        * is in use.
+        */
+       smp_mb__before_atomic();
        sbitmap_deferred_clear_bit(&sbq->sb, nr);
 
        /*