Merge tag 'vfs-5.4-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 Sep 2019 00:35:20 +0000 (17:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 Sep 2019 00:35:20 +0000 (17:35 -0700)
Pull swap access updates from Darrick Wong:
 "Prohibit writing to active swap files and swap partitions.

  There's no non-malicious use case for allowing userspace to scribble
  on storage that the kernel thinks it owns"

* tag 'vfs-5.4-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  vfs: don't allow writes to swap files
  mm: set S_SWAPFILE on blockdev swap devices

1  2 
fs/block_dev.c
include/linux/fs.h

diff --combined fs/block_dev.c
index 677cb364d33f0e43e65d25349fdcdce651a6ca5e,d9bab63a9b81839ac9d952a7784aecc0f668405e..9c073dbdc1b0435094d130bb3fad3c873824b300
@@@ -345,15 -345,24 +345,15 @@@ __blkdev_direct_IO(struct kiocb *iocb, 
        struct bio *bio;
        bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
        bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 -      bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
        loff_t pos = iocb->ki_pos;
        blk_qc_t qc = BLK_QC_T_NONE;
 -      gfp_t gfp;
 -      ssize_t ret;
 +      int ret = 0;
  
        if ((pos | iov_iter_alignment(iter)) &
            (bdev_logical_block_size(bdev) - 1))
                return -EINVAL;
  
 -      if (nowait)
 -              gfp = GFP_NOWAIT;
 -      else
 -              gfp = GFP_KERNEL;
 -
 -      bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
 -      if (!bio)
 -              return -EAGAIN;
 +      bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
  
        dio = container_of(bio, struct blkdev_dio, bio);
        dio->is_sync = is_sync = is_sync_kiocb(iocb);
        if (!is_poll)
                blk_start_plug(&plug);
  
 -      ret = 0;
        for (;;) {
 -              int err;
 -
                bio_set_dev(bio, bdev);
                bio->bi_iter.bi_sector = pos >> 9;
                bio->bi_write_hint = iocb->ki_hint;
                bio->bi_end_io = blkdev_bio_end_io;
                bio->bi_ioprio = iocb->ki_ioprio;
  
 -              err = bio_iov_iter_get_pages(bio, iter);
 -              if (unlikely(err)) {
 -                      if (!ret)
 -                              ret = err;
 +              ret = bio_iov_iter_get_pages(bio, iter);
 +              if (unlikely(ret)) {
                        bio->bi_status = BLK_STS_IOERR;
                        bio_endio(bio);
                        break;
                        task_io_account_write(bio->bi_iter.bi_size);
                }
  
 -              /*
 -               * Tell underlying layer to not block for resource shortage.
 -               * And if we would have blocked, return error inline instead
 -               * of through the bio->bi_end_io() callback.
 -               */
 -              if (nowait)
 -                      bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
 -
                dio->size += bio->bi_iter.bi_size;
                pos += bio->bi_iter.bi_size;
  
                        }
  
                        qc = submit_bio(bio);
 -                      if (qc == BLK_QC_T_EAGAIN) {
 -                              if (!ret)
 -                                      ret = -EAGAIN;
 -                              goto error;
 -                      }
  
                        if (polled)
                                WRITE_ONCE(iocb->ki_cookie, qc);
                        atomic_inc(&dio->ref);
                }
  
 -              qc = submit_bio(bio);
 -              if (qc == BLK_QC_T_EAGAIN) {
 -                      if (!ret)
 -                              ret = -EAGAIN;
 -                      goto error;
 -              }
 -              ret += bio->bi_iter.bi_size;
 -
 -              bio = bio_alloc(gfp, nr_pages);
 -              if (!bio) {
 -                      if (!ret)
 -                              ret = -EAGAIN;
 -                      goto error;
 -              }
 +              submit_bio(bio);
 +              bio = bio_alloc(GFP_KERNEL, nr_pages);
        }
  
        if (!is_poll)
        }
        __set_current_state(TASK_RUNNING);
  
 -out:
        if (!ret)
                ret = blk_status_to_errno(dio->bio.bi_status);
 +      if (likely(!ret))
 +              ret = dio->size;
  
        bio_put(&dio->bio);
        return ret;
 -error:
 -      if (!is_poll)
 -              blk_finish_plug(&plug);
 -      goto out;
  }
  
  static ssize_t
@@@ -1139,7 -1181,8 +1139,7 @@@ static struct gendisk *bdev_get_gendisk
   * Pointer to the block device containing @bdev on success, ERR_PTR()
   * value on failure.
   */
 -static struct block_device *bd_start_claiming(struct block_device *bdev,
 -                                            void *holder)
 +struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
  {
        struct gendisk *disk;
        struct block_device *whole;
                return ERR_PTR(err);
        }
  }
 +EXPORT_SYMBOL(bd_start_claiming);
 +
 +static void bd_clear_claiming(struct block_device *whole, void *holder)
 +{
 +      lockdep_assert_held(&bdev_lock);
 +      /* tell others that we're done */
 +      BUG_ON(whole->bd_claiming != holder);
 +      whole->bd_claiming = NULL;
 +      wake_up_bit(&whole->bd_claiming, 0);
 +}
 +
 +/**
 + * bd_finish_claiming - finish claiming of a block device
 + * @bdev: block device of interest
 + * @whole: whole block device (returned from bd_start_claiming())
 + * @holder: holder that has claimed @bdev
 + *
 + * Finish exclusive open of a block device. Mark the device as exlusively
 + * open by the holder and wake up all waiters for exclusive open to finish.
 + */
 +void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
 +                      void *holder)
 +{
 +      spin_lock(&bdev_lock);
 +      BUG_ON(!bd_may_claim(bdev, whole, holder));
 +      /*
 +       * Note that for a whole device bd_holders will be incremented twice,
 +       * and bd_holder will be set to bd_may_claim before being set to holder
 +       */
 +      whole->bd_holders++;
 +      whole->bd_holder = bd_may_claim;
 +      bdev->bd_holders++;
 +      bdev->bd_holder = holder;
 +      bd_clear_claiming(whole, holder);
 +      spin_unlock(&bdev_lock);
 +}
 +EXPORT_SYMBOL(bd_finish_claiming);
 +
 +/**
 + * bd_abort_claiming - abort claiming of a block device
 + * @bdev: block device of interest
 + * @whole: whole block device (returned from bd_start_claiming())
 + * @holder: holder that has claimed @bdev
 + *
 + * Abort claiming of a block device when the exclusive open failed. This can be
 + * also used when exclusive open is not actually desired and we just needed
 + * to block other exclusive openers for a while.
 + */
 +void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
 +                     void *holder)
 +{
 +      spin_lock(&bdev_lock);
 +      bd_clear_claiming(whole, holder);
 +      spin_unlock(&bdev_lock);
 +}
 +EXPORT_SYMBOL(bd_abort_claiming);
  
  #ifdef CONFIG_SYSFS
  struct bd_holder_disk {
@@@ -1711,10 -1698,29 +1711,10 @@@ int blkdev_get(struct block_device *bde
  
                /* finish claiming */
                mutex_lock(&bdev->bd_mutex);
 -              spin_lock(&bdev_lock);
 -
 -              if (!res) {
 -                      BUG_ON(!bd_may_claim(bdev, whole, holder));
 -                      /*
 -                       * Note that for a whole device bd_holders
 -                       * will be incremented twice, and bd_holder
 -                       * will be set to bd_may_claim before being
 -                       * set to holder
 -                       */
 -                      whole->bd_holders++;
 -                      whole->bd_holder = bd_may_claim;
 -                      bdev->bd_holders++;
 -                      bdev->bd_holder = holder;
 -              }
 -
 -              /* tell others that we're done */
 -              BUG_ON(whole->bd_claiming != holder);
 -              whole->bd_claiming = NULL;
 -              wake_up_bit(&whole->bd_claiming, 0);
 -
 -              spin_unlock(&bdev_lock);
 -
 +              if (!res)
 +                      bd_finish_claiming(bdev, whole, holder);
 +              else
 +                      bd_abort_claiming(bdev, whole, holder);
                /*
                 * Block event polling for write claims if requested.  Any
                 * write holder makes the write_holder state stick until
@@@ -1972,6 -1978,9 +1972,9 @@@ ssize_t blkdev_write_iter(struct kiocb 
        if (bdev_read_only(I_BDEV(bd_inode)))
                return -EPERM;
  
+       if (IS_SWAPFILE(bd_inode))
+               return -ETXTBSY;
        if (!iov_iter_count(from))
                return 0;
  
diff --combined include/linux/fs.h
index 104a727f8a67373a3b473d7c99ae8000b9f1cd4a,a2e3d446ba8ef1fb4605891fe1df60ca64fdcded..ae6648145d18536dea83454e354a23fafed32f51
@@@ -64,8 -64,6 +64,8 @@@ struct workqueue_struct
  struct iov_iter;
  struct fscrypt_info;
  struct fscrypt_operations;
 +struct fsverity_info;
 +struct fsverity_operations;
  struct fs_context;
  struct fs_parameter_description;
  
@@@ -725,10 -723,6 +725,10 @@@ struct inode 
        struct fscrypt_info     *i_crypt_info;
  #endif
  
 +#ifdef CONFIG_FS_VERITY
 +      struct fsverity_info    *i_verity_info;
 +#endif
 +
        void                    *i_private; /* fs or device private pointer */
  } __randomize_layout;
  
@@@ -1433,10 -1427,6 +1433,10 @@@ struct super_block 
        const struct xattr_handler **s_xattr;
  #ifdef CONFIG_FS_ENCRYPTION
        const struct fscrypt_operations *s_cop;
 +      struct key              *s_master_keys; /* master crypto keys in use */
 +#endif
 +#ifdef CONFIG_FS_VERITY
 +      const struct fsverity_operations *s_vop;
  #endif
        struct hlist_bl_head    s_roots;        /* alternate root dentries for NFS */
        struct list_head        s_mounts;       /* list of mounts; _not_ for fs use */
@@@ -1975,7 -1965,6 +1975,7 @@@ struct super_operations 
  #endif
  #define S_ENCRYPTED   16384   /* Encrypted file (using fs/crypto/) */
  #define S_CASEFOLD    32768   /* Casefolded file */
 +#define S_VERITY      65536   /* Verity file (using fs/verity/) */
  
  /*
   * Note that nosuid etc flags are inode-specific: setting some file-system
@@@ -2017,7 -2006,6 +2017,7 @@@ static inline bool sb_rdonly(const stru
  #define IS_DAX(inode)         ((inode)->i_flags & S_DAX)
  #define IS_ENCRYPTED(inode)   ((inode)->i_flags & S_ENCRYPTED)
  #define IS_CASEFOLDED(inode)  ((inode)->i_flags & S_CASEFOLD)
 +#define IS_VERITY(inode)      ((inode)->i_flags & S_VERITY)
  
  #define IS_WHITEOUT(inode)    (S_ISCHR(inode->i_mode) && \
                                 (inode)->i_rdev == WHITEOUT_DEV)
@@@ -2610,12 -2598,6 +2610,12 @@@ extern struct block_device *blkdev_get_
                                               void *holder);
  extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
                                              void *holder);
 +extern struct block_device *bd_start_claiming(struct block_device *bdev,
 +                                            void *holder);
 +extern void bd_finish_claiming(struct block_device *bdev,
 +                             struct block_device *whole, void *holder);
 +extern void bd_abort_claiming(struct block_device *bdev,
 +                            struct block_device *whole, void *holder);
  extern void blkdev_put(struct block_device *bdev, fmode_t mode);
  extern int __blkdev_reread_part(struct block_device *bdev);
  extern int blkdev_reread_part(struct block_device *bdev);
@@@ -3565,4 -3547,15 +3565,15 @@@ static inline void simple_fill_fsxattr(
        fa->fsx_xflags = xflags;
  }
  
+ /*
+  * Flush file data before changing attributes.  Caller must hold any locks
+  * required to prevent further writes to this file until we're done setting
+  * flags.
+  */
+ static inline int inode_drain_writes(struct inode *inode)
+ {
+       inode_dio_wait(inode);
+       return filemap_write_and_wait(inode->i_mapping);
+ }
  #endif /* _LINUX_FS_H */