Merge branch 'nvme-5.3-rc' of git://git.infradead.org/nvme into for-linus
authorJens Axboe <axboe@kernel.dk>
Sun, 11 Aug 2019 03:41:41 +0000 (21:41 -0600)
committerJens Axboe <axboe@kernel.dk>
Sun, 11 Aug 2019 03:41:41 +0000 (21:41 -0600)
Pull NVMe fixes from Sagi:

"Few nvme fixes for the next rc round.
- detect capacity changes on the mpath disk from Anthony
- probe/remove fix from Keith
- various fixes to pass blktests from Logan
- deadlock in reset/scan race fix
- nvme-rdma use-after-free fix
- deadlock fix when passthru commands race mpath disk info update"

* 'nvme-5.3-rc' of git://git.infradead.org/nvme:
  nvme-pci: Fix async probe remove race
  nvme: fix controller removal race with scan work
  nvme-rdma: fix possible use-after-free in connect error flow
  nvme: fix a possible deadlock when passthru commands sent to a multipath device
  nvme-core: Fix extra device_put() call on error path
  nvmet-file: fix nvmet_file_flush() always returning an error
  nvmet-loop: Flush nvme_delete_wq when removing the port
  nvmet: Fix use-after-free bug when a port is removed
  nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns

14 files changed:
MAINTAINERS
block/bfq-iosched.c
drivers/ata/libahci_platform.c
drivers/ata/libata-scsi.c
drivers/ata/libata-sff.c
drivers/ata/pata_rb532_cf.c
drivers/block/aoe/aoedev.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/md/bcache/sysfs.c
drivers/s390/block/dasd_alias.c
fs/block_dev.c
fs/io_uring.c
include/linux/fs.h

index 6426db5198f0537746c22d10f95ce4a5004fdde3..6c49b48cfd69722fb02c6a5f6886758122527685 100644 (file)
@@ -6322,7 +6322,8 @@ F:        Documentation/devicetree/bindings/counter/ftm-quaddec.txt
 F:     drivers/counter/ftm-quaddec.c
 
 FLOPPY DRIVER
-S:     Orphan
+M:     Denis Efremov <efremov@linux.com>
+S:     Odd Fixes
 L:     linux-block@vger.kernel.org
 F:     drivers/block/floppy.c
 
index 586fcfe227eae6ca777f26e45a7605644451643d..b33be928d164fef34f5d2f00863d9034e38ef1a2 100644 (file)
@@ -1924,12 +1924,13 @@ static void bfq_add_request(struct request *rq)
                 * confirmed no later than during the next
                 * I/O-plugging interval for bfqq.
                 */
-               if (!bfq_bfqq_has_short_ttime(bfqq) &&
+               if (bfqd->last_completed_rq_bfqq &&
+                   !bfq_bfqq_has_short_ttime(bfqq) &&
                    ktime_get_ns() - bfqd->last_completion <
                    200 * NSEC_PER_USEC) {
                        if (bfqd->last_completed_rq_bfqq != bfqq &&
-                                  bfqd->last_completed_rq_bfqq !=
-                                  bfqq->waker_bfqq) {
+                           bfqd->last_completed_rq_bfqq !=
+                           bfqq->waker_bfqq) {
                                /*
                                 * First synchronization detected with
                                 * a candidate waker queue, or with a
@@ -2250,9 +2251,14 @@ static void bfq_request_merged(struct request_queue *q, struct request *req,
            blk_rq_pos(container_of(rb_prev(&req->rb_node),
                                    struct request, rb_node))) {
                struct bfq_queue *bfqq = bfq_init_rq(req);
-               struct bfq_data *bfqd = bfqq->bfqd;
+               struct bfq_data *bfqd;
                struct request *prev, *next_rq;
 
+               if (!bfqq)
+                       return;
+
+               bfqd = bfqq->bfqd;
+
                /* Reposition request in its sort_list */
                elv_rb_del(&bfqq->sort_list, req);
                elv_rb_add(&bfqq->sort_list, req);
@@ -2299,6 +2305,9 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq,
        struct bfq_queue *bfqq = bfq_init_rq(rq),
                *next_bfqq = bfq_init_rq(next);
 
+       if (!bfqq)
+               return;
+
        /*
         * If next and rq belong to the same bfq_queue and next is older
         * than rq, then reposition rq in the fifo (by substituting next
@@ -4764,6 +4773,8 @@ static struct request *bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
  */
 void bfq_put_queue(struct bfq_queue *bfqq)
 {
+       struct bfq_queue *item;
+       struct hlist_node *n;
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
        struct bfq_group *bfqg = bfqq_group(bfqq);
 #endif
@@ -4808,6 +4819,36 @@ void bfq_put_queue(struct bfq_queue *bfqq)
                        bfqq->bfqd->burst_size--;
        }
 
+       /*
+        * bfqq does not exist any longer, so it cannot be woken by
+        * any other queue, and cannot wake any other queue. Then bfqq
+        * must be removed from the woken list of its possible waker
+        * queue, and all queues in the woken list of bfqq must stop
+        * having a waker queue. Strictly speaking, these updates
+        * should be performed when bfqq remains with no I/O source
+        * attached to it, which happens before bfqq gets freed. In
+        * particular, this happens when the last process associated
+        * with bfqq exits or gets associated with a different
+        * queue. However, both events lead to bfqq being freed soon,
+        * and dangling references would come out only after bfqq gets
+        * freed. So these updates are done here, as a simple and safe
+        * way to handle all cases.
+        */
+       /* remove bfqq from woken list */
+       if (!hlist_unhashed(&bfqq->woken_list_node))
+               hlist_del_init(&bfqq->woken_list_node);
+
+       /* reset waker for all queues in woken list */
+       hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
+                                 woken_list_node) {
+               item->waker_bfqq = NULL;
+               bfq_clear_bfqq_has_waker(item);
+               hlist_del_init(&item->woken_list_node);
+       }
+
+       if (bfqq->bfqd && bfqq->bfqd->last_completed_rq_bfqq == bfqq)
+               bfqq->bfqd->last_completed_rq_bfqq = NULL;
+
        kmem_cache_free(bfq_pool, bfqq);
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
        bfqg_and_blkg_put(bfqg);
@@ -4835,9 +4876,6 @@ static void bfq_put_cooperator(struct bfq_queue *bfqq)
 
 static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 {
-       struct bfq_queue *item;
-       struct hlist_node *n;
-
        if (bfqq == bfqd->in_service_queue) {
                __bfq_bfqq_expire(bfqd, bfqq, BFQQE_BUDGET_TIMEOUT);
                bfq_schedule_dispatch(bfqd);
@@ -4847,18 +4885,6 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 
        bfq_put_cooperator(bfqq);
 
-       /* remove bfqq from woken list */
-       if (!hlist_unhashed(&bfqq->woken_list_node))
-               hlist_del_init(&bfqq->woken_list_node);
-
-       /* reset waker for all queues in woken list */
-       hlist_for_each_entry_safe(item, n, &bfqq->woken_list,
-                                 woken_list_node) {
-               item->waker_bfqq = NULL;
-               bfq_clear_bfqq_has_waker(item);
-               hlist_del_init(&item->woken_list_node);
-       }
-
        bfq_put_queue(bfqq); /* release process reference */
 }
 
@@ -5436,12 +5462,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
        spin_lock_irq(&bfqd->lock);
        bfqq = bfq_init_rq(rq);
-       if (at_head || blk_rq_is_passthrough(rq)) {
+       if (!bfqq || at_head || blk_rq_is_passthrough(rq)) {
                if (at_head)
                        list_add(&rq->queuelist, &bfqd->dispatch);
                else
                        list_add_tail(&rq->queuelist, &bfqd->dispatch);
-       } else { /* bfqq is assumed to be non null here */
+       } else {
                idle_timer_disabled = __bfq_insert_request(bfqd, rq);
                /*
                 * Update bfqq, because, if a queue merge has occurred
index 3a36e76eca831db26715248b7665a674801aed28..9e9583a6bba99295601cc92399c94e85f623cef3 100644 (file)
@@ -338,6 +338,9 @@ static int ahci_platform_get_phy(struct ahci_host_priv *hpriv, u32 port,
                hpriv->phys[port] = NULL;
                rc = 0;
                break;
+       case -EPROBE_DEFER:
+               /* Do not complain yet */
+               break;
 
        default:
                dev_err(dev,
index 391ac0503dc075e7de0957c56aa7a30f966927e4..76d0f9de767bcec3d19b59a390cf95d9954c4025 100644 (file)
@@ -1786,6 +1786,21 @@ nothing_to_do:
        return 1;
 }
 
+static bool ata_check_nblocks(struct scsi_cmnd *scmd, u32 n_blocks)
+{
+       struct request *rq = scmd->request;
+       u32 req_blocks;
+
+       if (!blk_rq_is_passthrough(rq))
+               return true;
+
+       req_blocks = blk_rq_bytes(rq) / scmd->device->sector_size;
+       if (n_blocks > req_blocks)
+               return false;
+
+       return true;
+}
+
 /**
  *     ata_scsi_rw_xlat - Translate SCSI r/w command into an ATA one
  *     @qc: Storage for translated ATA taskfile
@@ -1830,6 +1845,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
                scsi_10_lba_len(cdb, &block, &n_block);
                if (cdb[1] & (1 << 3))
                        tf_flags |= ATA_TFLAG_FUA;
+               if (!ata_check_nblocks(scmd, n_block))
+                       goto invalid_fld;
                break;
        case READ_6:
        case WRITE_6:
@@ -1844,6 +1861,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
                 */
                if (!n_block)
                        n_block = 256;
+               if (!ata_check_nblocks(scmd, n_block))
+                       goto invalid_fld;
                break;
        case READ_16:
        case WRITE_16:
@@ -1854,6 +1873,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
                scsi_16_lba_len(cdb, &block, &n_block);
                if (cdb[1] & (1 << 3))
                        tf_flags |= ATA_TFLAG_FUA;
+               if (!ata_check_nblocks(scmd, n_block))
+                       goto invalid_fld;
                break;
        default:
                DPRINTK("no-byte command\n");
index 10aa2788214279b1682135e77fc22ac0007e6190..4f115adb4ee83b3836f1d596e1350ce3785011d6 100644 (file)
@@ -658,6 +658,10 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
        unsigned int offset;
        unsigned char *buf;
 
+       if (!qc->cursg) {
+               qc->curbytes = qc->nbytes;
+               return;
+       }
        if (qc->curbytes == qc->nbytes - qc->sect_size)
                ap->hsm_task_state = HSM_ST_LAST;
 
@@ -683,6 +687,8 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 
        if (qc->cursg_ofs == qc->cursg->length) {
                qc->cursg = sg_next(qc->cursg);
+               if (!qc->cursg)
+                       ap->hsm_task_state = HSM_ST_LAST;
                qc->cursg_ofs = 0;
        }
 }
index 7c37f2ff09e4169f30b42fbb12798abc19de228e..deae466395de1a656d5cba27720fb8f345e6c332 100644 (file)
@@ -158,7 +158,6 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
 static int rb532_pata_driver_remove(struct platform_device *pdev)
 {
        struct ata_host *ah = platform_get_drvdata(pdev);
-       struct rb532_cf_info *info = ah->private_data;
 
        ata_host_detach(ah);
 
index 5b49f1b33ebec4cff30ec71347c97f119041b351..e2ea2356da0610c6872724e0295e4f8335ded013 100644 (file)
@@ -323,10 +323,14 @@ flush(const char __user *str, size_t cnt, int exiting)
        }
 
        flush_scheduled_work();
-       /* pass one: without sleeping, do aoedev_downdev */
+       /* pass one: do aoedev_downdev, which might sleep */
+restart1:
        spin_lock_irqsave(&devlist_lock, flags);
        for (d = devlist; d; d = d->next) {
                spin_lock(&d->lock);
+               if (d->flags & DEVFL_TKILL)
+                       goto cont;
+
                if (exiting) {
                        /* unconditionally take each device down */
                } else if (specified) {
@@ -338,8 +342,11 @@ flush(const char __user *str, size_t cnt, int exiting)
                || d->ref)
                        goto cont;
 
+               spin_unlock(&d->lock);
+               spin_unlock_irqrestore(&devlist_lock, flags);
                aoedev_downdev(d);
                d->flags |= DEVFL_TKILL;
+               goto restart1;
 cont:
                spin_unlock(&d->lock);
        }
@@ -348,7 +355,7 @@ cont:
        /* pass two: call freedev, which might sleep,
         * for aoedevs marked with DEVFL_TKILL
         */
-restart:
+restart2:
        spin_lock_irqsave(&devlist_lock, flags);
        for (d = devlist; d; d = d->next) {
                spin_lock(&d->lock);
@@ -357,7 +364,7 @@ restart:
                        spin_unlock(&d->lock);
                        spin_unlock_irqrestore(&devlist_lock, flags);
                        freedev(d);
-                       goto restart;
+                       goto restart2;
                }
                spin_unlock(&d->lock);
        }
index 44c9985f352abd0cfb4ddda003302e09f76ce4bd..ab7ca5989097ac70372f77f4fb60964a98c98177 100644 (file)
@@ -885,7 +885,7 @@ static void loop_unprepare_queue(struct loop_device *lo)
 
 static int loop_kthread_worker_fn(void *worker_ptr)
 {
-       current->flags |= PF_LESS_THROTTLE;
+       current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO;
        return kthread_worker_fn(worker_ptr);
 }
 
@@ -924,6 +924,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        struct file     *file;
        struct inode    *inode;
        struct address_space *mapping;
+       struct block_device *claimed_bdev = NULL;
        int             lo_flags = 0;
        int             error;
        loff_t          size;
@@ -942,10 +943,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
         * here to avoid changing device under exclusive owner.
         */
        if (!(mode & FMODE_EXCL)) {
-               bdgrab(bdev);
-               error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd);
-               if (error)
+               claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
+               if (IS_ERR(claimed_bdev)) {
+                       error = PTR_ERR(claimed_bdev);
                        goto out_putf;
+               }
        }
 
        error = mutex_lock_killable(&loop_ctl_mutex);
@@ -1015,15 +1017,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
        mutex_unlock(&loop_ctl_mutex);
        if (partscan)
                loop_reread_partitions(lo, bdev);
-       if (!(mode & FMODE_EXCL))
-               blkdev_put(bdev, mode | FMODE_EXCL);
+       if (claimed_bdev)
+               bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
        return 0;
 
 out_unlock:
        mutex_unlock(&loop_ctl_mutex);
 out_bdev:
-       if (!(mode & FMODE_EXCL))
-               blkdev_put(bdev, mode | FMODE_EXCL);
+       if (claimed_bdev)
+               bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
 out_putf:
        fput(file);
 out:
index 9bcde2325893183167dcaed84f299af777d62a39..e21d2ded732b735c13f7ebbd02533ed081afe0ca 100644 (file)
@@ -1231,7 +1231,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
                                 struct block_device *bdev)
 {
        sock_shutdown(nbd);
-       kill_bdev(bdev);
+       __invalidate_device(bdev, true);
        nbd_bdev_reset(bdev);
        if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
index 9f0826712845050ab6ee3d54abe69980eb8d9216..e2059af9079181e9193795757b2ed5b448741f48 100644 (file)
@@ -23,24 +23,28 @@ static const char * const bch_cache_modes[] = {
        "writethrough",
        "writeback",
        "writearound",
-       "none"
+       "none",
+       NULL
 };
 
 /* Default is 0 ("auto") */
 static const char * const bch_stop_on_failure_modes[] = {
        "auto",
-       "always"
+       "always",
+       NULL
 };
 
 static const char * const cache_replacement_policies[] = {
        "lru",
        "fifo",
-       "random"
+       "random",
+       NULL
 };
 
 static const char * const error_actions[] = {
        "unregister",
-       "panic"
+       "panic",
+       NULL
 };
 
 write_attribute(attach);
@@ -338,7 +342,7 @@ STORE(__cached_dev)
        }
 
        if (attr == &sysfs_cache_mode) {
-               v = sysfs_match_string(bch_cache_modes, buf);
+               v = __sysfs_match_string(bch_cache_modes, -1, buf);
                if (v < 0)
                        return v;
 
@@ -349,7 +353,7 @@ STORE(__cached_dev)
        }
 
        if (attr == &sysfs_stop_when_cache_set_failed) {
-               v = sysfs_match_string(bch_stop_on_failure_modes, buf);
+               v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
                if (v < 0)
                        return v;
 
@@ -816,7 +820,7 @@ STORE(__bch_cache_set)
                            0, UINT_MAX);
 
        if (attr == &sysfs_errors) {
-               v = sysfs_match_string(error_actions, buf);
+               v = __sysfs_match_string(error_actions, -1, buf);
                if (v < 0)
                        return v;
 
@@ -1088,7 +1092,7 @@ STORE(__bch_cache)
        }
 
        if (attr == &sysfs_cache_replacement_policy) {
-               v = sysfs_match_string(cache_replacement_policies, buf);
+               v = __sysfs_match_string(cache_replacement_policies, -1, buf);
                if (v < 0)
                        return v;
 
index b9ce93e9df89295eb72132fcfc81d0257aaa1723..99f86612f7751ad6d47b7abee8661e8deec2b0b2 100644 (file)
@@ -383,6 +383,20 @@ suborder_not_supported(struct dasd_ccw_req *cqr)
        char msg_format;
        char msg_no;
 
+       /*
+        * intrc values ENODEV, ENOLINK and EPERM
+        * will be optained from sleep_on to indicate that no
+        * IO operation can be started
+        */
+       if (cqr->intrc == -ENODEV)
+               return 1;
+
+       if (cqr->intrc == -ENOLINK)
+               return 1;
+
+       if (cqr->intrc == -EPERM)
+               return 1;
+
        sense = dasd_get_sense(&cqr->irb);
        if (!sense)
                return 0;
@@ -447,12 +461,8 @@ static int read_unit_address_configuration(struct dasd_device *device,
        lcu->flags &= ~NEED_UAC_UPDATE;
        spin_unlock_irqrestore(&lcu->lock, flags);
 
-       do {
-               rc = dasd_sleep_on(cqr);
-               if (rc && suborder_not_supported(cqr))
-                       return -EOPNOTSUPP;
-       } while (rc && (cqr->retries > 0));
-       if (rc) {
+       rc = dasd_sleep_on(cqr);
+       if (rc && !suborder_not_supported(cqr)) {
                spin_lock_irqsave(&lcu->lock, flags);
                lcu->flags |= NEED_UAC_UPDATE;
                spin_unlock_irqrestore(&lcu->lock, flags);
index c2a85b587922d9eacf8c539d00ad2b7efff26a3f..eb657ab9406062cdd23136e66fc265e472523a80 100644 (file)
@@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        loff_t pos = iocb->ki_pos;
        blk_qc_t qc = BLK_QC_T_NONE;
        gfp_t gfp;
-       ssize_t ret;
+       int ret;
 
        if ((pos | iov_iter_alignment(iter)) &
            (bdev_logical_block_size(bdev) - 1))
@@ -386,8 +386,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
        ret = 0;
        for (;;) {
-               int err;
-
                bio_set_dev(bio, bdev);
                bio->bi_iter.bi_sector = pos >> 9;
                bio->bi_write_hint = iocb->ki_hint;
@@ -395,10 +393,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                bio->bi_end_io = blkdev_bio_end_io;
                bio->bi_ioprio = iocb->ki_ioprio;
 
-               err = bio_iov_iter_get_pages(bio, iter);
-               if (unlikely(err)) {
-                       if (!ret)
-                               ret = err;
+               ret = bio_iov_iter_get_pages(bio, iter);
+               if (unlikely(ret)) {
                        bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                        break;
@@ -421,7 +417,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                if (nowait)
                        bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
 
-               dio->size += bio->bi_iter.bi_size;
                pos += bio->bi_iter.bi_size;
 
                nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
@@ -433,10 +428,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                                polled = true;
                        }
 
+                       dio->size += bio->bi_iter.bi_size;
                        qc = submit_bio(bio);
                        if (qc == BLK_QC_T_EAGAIN) {
-                               if (!ret)
-                                       ret = -EAGAIN;
+                               dio->size -= bio->bi_iter.bi_size;
+                               ret = -EAGAIN;
                                goto error;
                        }
 
@@ -459,18 +455,17 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                        atomic_inc(&dio->ref);
                }
 
+               dio->size += bio->bi_iter.bi_size;
                qc = submit_bio(bio);
                if (qc == BLK_QC_T_EAGAIN) {
-                       if (!ret)
-                               ret = -EAGAIN;
+                       dio->size -= bio->bi_iter.bi_size;
+                       ret = -EAGAIN;
                        goto error;
                }
-               ret += bio->bi_iter.bi_size;
 
                bio = bio_alloc(gfp, nr_pages);
                if (!bio) {
-                       if (!ret)
-                               ret = -EAGAIN;
+                       ret = -EAGAIN;
                        goto error;
                }
        }
@@ -495,6 +490,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 out:
        if (!ret)
                ret = blk_status_to_errno(dio->bio.bi_status);
+       if (likely(!ret))
+               ret = dio->size;
 
        bio_put(&dio->bio);
        return ret;
@@ -1181,8 +1178,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
  * Pointer to the block device containing @bdev on success, ERR_PTR()
  * value on failure.
  */
-static struct block_device *bd_start_claiming(struct block_device *bdev,
-                                             void *holder)
+struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
 {
        struct gendisk *disk;
        struct block_device *whole;
@@ -1229,6 +1225,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
                return ERR_PTR(err);
        }
 }
+EXPORT_SYMBOL(bd_start_claiming);
+
+static void bd_clear_claiming(struct block_device *whole, void *holder)
+{
+       lockdep_assert_held(&bdev_lock);
+       /* tell others that we're done */
+       BUG_ON(whole->bd_claiming != holder);
+       whole->bd_claiming = NULL;
+       wake_up_bit(&whole->bd_claiming, 0);
+}
+
+/**
+ * bd_finish_claiming - finish claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Finish exclusive open of a block device. Mark the device as exlusively
+ * open by the holder and wake up all waiters for exclusive open to finish.
+ */
+void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
+                       void *holder)
+{
+       spin_lock(&bdev_lock);
+       BUG_ON(!bd_may_claim(bdev, whole, holder));
+       /*
+        * Note that for a whole device bd_holders will be incremented twice,
+        * and bd_holder will be set to bd_may_claim before being set to holder
+        */
+       whole->bd_holders++;
+       whole->bd_holder = bd_may_claim;
+       bdev->bd_holders++;
+       bdev->bd_holder = holder;
+       bd_clear_claiming(whole, holder);
+       spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_finish_claiming);
+
+/**
+ * bd_abort_claiming - abort claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Abort claiming of a block device when the exclusive open failed. This can be
+ * also used when exclusive open is not actually desired and we just needed
+ * to block other exclusive openers for a while.
+ */
+void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+                      void *holder)
+{
+       spin_lock(&bdev_lock);
+       bd_clear_claiming(whole, holder);
+       spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_abort_claiming);
 
 #ifdef CONFIG_SYSFS
 struct bd_holder_disk {
@@ -1698,29 +1750,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 
                /* finish claiming */
                mutex_lock(&bdev->bd_mutex);
-               spin_lock(&bdev_lock);
-
-               if (!res) {
-                       BUG_ON(!bd_may_claim(bdev, whole, holder));
-                       /*
-                        * Note that for a whole device bd_holders
-                        * will be incremented twice, and bd_holder
-                        * will be set to bd_may_claim before being
-                        * set to holder
-                        */
-                       whole->bd_holders++;
-                       whole->bd_holder = bd_may_claim;
-                       bdev->bd_holders++;
-                       bdev->bd_holder = holder;
-               }
-
-               /* tell others that we're done */
-               BUG_ON(whole->bd_claiming != holder);
-               whole->bd_claiming = NULL;
-               wake_up_bit(&whole->bd_claiming, 0);
-
-               spin_unlock(&bdev_lock);
-
+               if (!res)
+                       bd_finish_claiming(bdev, whole, holder);
+               else
+                       bd_abort_claiming(bdev, whole, holder);
                /*
                 * Block event polling for write claims if requested.  Any
                 * write holder makes the write_holder state stick until
index 012bc0efb9d3cba3241c5fe71883106f5616b092..d542f1cf4428ed79af62c76273d32885490f72ff 100644 (file)
@@ -1838,6 +1838,7 @@ restart:
        do {
                struct sqe_submit *s = &req->submit;
                const struct io_uring_sqe *sqe = s->sqe;
+               unsigned int flags = req->flags;
 
                /* Ensure we clear previously set non-block flag */
                req->rw.ki_flags &= ~IOCB_NOWAIT;
@@ -1883,7 +1884,7 @@ restart:
                kfree(sqe);
 
                /* req from defer and link list needn't decrease async cnt */
-               if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
+               if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
                        goto out;
 
                if (!async_list)
index 56b8e358af5c1e6c6e02663a6722909e0a13d70d..997a530ff4e9d038d1e705c582833f8f6817a436 100644 (file)
@@ -2598,6 +2598,12 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
                                               void *holder);
 extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
                                              void *holder);
+extern struct block_device *bd_start_claiming(struct block_device *bdev,
+                                             void *holder);
+extern void bd_finish_claiming(struct block_device *bdev,
+                              struct block_device *whole, void *holder);
+extern void bd_abort_claiming(struct block_device *bdev,
+                             struct block_device *whole, void *holder);
 extern void blkdev_put(struct block_device *bdev, fmode_t mode);
 extern int __blkdev_reread_part(struct block_device *bdev);
 extern int blkdev_reread_part(struct block_device *bdev);