Merge tag 'vfs-5.4-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[linux-2.6-block.git] / fs / block_dev.c
index d9bab63a9b81839ac9d952a7784aecc0f668405e..9c073dbdc1b0435094d130bb3fad3c873824b300 100644 (file)
@@ -345,24 +345,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        struct bio *bio;
        bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
        bool is_read = (iov_iter_rw(iter) == READ), is_sync;
-       bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
        loff_t pos = iocb->ki_pos;
        blk_qc_t qc = BLK_QC_T_NONE;
-       gfp_t gfp;
-       ssize_t ret;
+       int ret = 0;
 
        if ((pos | iov_iter_alignment(iter)) &
            (bdev_logical_block_size(bdev) - 1))
                return -EINVAL;
 
-       if (nowait)
-               gfp = GFP_NOWAIT;
-       else
-               gfp = GFP_KERNEL;
-
-       bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
-       if (!bio)
-               return -EAGAIN;
+       bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
 
        dio = container_of(bio, struct blkdev_dio, bio);
        dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -384,10 +375,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        if (!is_poll)
                blk_start_plug(&plug);
 
-       ret = 0;
        for (;;) {
-               int err;
-
                bio_set_dev(bio, bdev);
                bio->bi_iter.bi_sector = pos >> 9;
                bio->bi_write_hint = iocb->ki_hint;
@@ -395,10 +383,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                bio->bi_end_io = blkdev_bio_end_io;
                bio->bi_ioprio = iocb->ki_ioprio;
 
-               err = bio_iov_iter_get_pages(bio, iter);
-               if (unlikely(err)) {
-                       if (!ret)
-                               ret = err;
+               ret = bio_iov_iter_get_pages(bio, iter);
+               if (unlikely(ret)) {
                        bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                        break;
@@ -413,14 +399,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                        task_io_account_write(bio->bi_iter.bi_size);
                }
 
-               /*
-                * Tell underlying layer to not block for resource shortage.
-                * And if we would have blocked, return error inline instead
-                * of through the bio->bi_end_io() callback.
-                */
-               if (nowait)
-                       bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
-
                dio->size += bio->bi_iter.bi_size;
                pos += bio->bi_iter.bi_size;
 
@@ -434,11 +412,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                        }
 
                        qc = submit_bio(bio);
-                       if (qc == BLK_QC_T_EAGAIN) {
-                               if (!ret)
-                                       ret = -EAGAIN;
-                               goto error;
-                       }
 
                        if (polled)
                                WRITE_ONCE(iocb->ki_cookie, qc);
@@ -459,20 +432,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                        atomic_inc(&dio->ref);
                }
 
-               qc = submit_bio(bio);
-               if (qc == BLK_QC_T_EAGAIN) {
-                       if (!ret)
-                               ret = -EAGAIN;
-                       goto error;
-               }
-               ret += bio->bi_iter.bi_size;
-
-               bio = bio_alloc(gfp, nr_pages);
-               if (!bio) {
-                       if (!ret)
-                               ret = -EAGAIN;
-                       goto error;
-               }
+               submit_bio(bio);
+               bio = bio_alloc(GFP_KERNEL, nr_pages);
        }
 
        if (!is_poll)
@@ -492,16 +453,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
        }
        __set_current_state(TASK_RUNNING);
 
-out:
        if (!ret)
                ret = blk_status_to_errno(dio->bio.bi_status);
+       if (likely(!ret))
+               ret = dio->size;
 
        bio_put(&dio->bio);
        return ret;
-error:
-       if (!is_poll)
-               blk_finish_plug(&plug);
-       goto out;
 }
 
 static ssize_t
@@ -1181,8 +1139,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
  * Pointer to the block device containing @bdev on success, ERR_PTR()
  * value on failure.
  */
-static struct block_device *bd_start_claiming(struct block_device *bdev,
-                                             void *holder)
+struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
 {
        struct gendisk *disk;
        struct block_device *whole;
@@ -1229,6 +1186,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
                return ERR_PTR(err);
        }
 }
+EXPORT_SYMBOL(bd_start_claiming);
+
+static void bd_clear_claiming(struct block_device *whole, void *holder)
+{
+       lockdep_assert_held(&bdev_lock);
+       /* tell others that we're done */
+       BUG_ON(whole->bd_claiming != holder);
+       whole->bd_claiming = NULL;
+       wake_up_bit(&whole->bd_claiming, 0);
+}
+
+/**
+ * bd_finish_claiming - finish claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Finish exclusive open of a block device. Mark the device as exlusively
+ * open by the holder and wake up all waiters for exclusive open to finish.
+ */
+void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
+                       void *holder)
+{
+       spin_lock(&bdev_lock);
+       BUG_ON(!bd_may_claim(bdev, whole, holder));
+       /*
+        * Note that for a whole device bd_holders will be incremented twice,
+        * and bd_holder will be set to bd_may_claim before being set to holder
+        */
+       whole->bd_holders++;
+       whole->bd_holder = bd_may_claim;
+       bdev->bd_holders++;
+       bdev->bd_holder = holder;
+       bd_clear_claiming(whole, holder);
+       spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_finish_claiming);
+
+/**
+ * bd_abort_claiming - abort claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Abort claiming of a block device when the exclusive open failed. This can be
+ * also used when exclusive open is not actually desired and we just needed
+ * to block other exclusive openers for a while.
+ */
+void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+                      void *holder)
+{
+       spin_lock(&bdev_lock);
+       bd_clear_claiming(whole, holder);
+       spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_abort_claiming);
 
 #ifdef CONFIG_SYSFS
 struct bd_holder_disk {
@@ -1698,29 +1711,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 
                /* finish claiming */
                mutex_lock(&bdev->bd_mutex);
-               spin_lock(&bdev_lock);
-
-               if (!res) {
-                       BUG_ON(!bd_may_claim(bdev, whole, holder));
-                       /*
-                        * Note that for a whole device bd_holders
-                        * will be incremented twice, and bd_holder
-                        * will be set to bd_may_claim before being
-                        * set to holder
-                        */
-                       whole->bd_holders++;
-                       whole->bd_holder = bd_may_claim;
-                       bdev->bd_holders++;
-                       bdev->bd_holder = holder;
-               }
-
-               /* tell others that we're done */
-               BUG_ON(whole->bd_claiming != holder);
-               whole->bd_claiming = NULL;
-               wake_up_bit(&whole->bd_claiming, 0);
-
-               spin_unlock(&bdev_lock);
-
+               if (!res)
+                       bd_finish_claiming(bdev, whole, holder);
+               else
+                       bd_abort_claiming(bdev, whole, holder);
                /*
                 * Block event polling for write claims if requested.  Any
                 * write holder makes the write_holder state stick until