Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Jun 2014 17:30:18 +0000 (10:30 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Jun 2014 17:30:18 +0000 (10:30 -0700)
Pull vfs updates from Al Viro:
 "This the bunch that sat in -next + lock_parent() fix.  This is the
  minimal set; there's more pending stuff.

  In particular, I really hope to get acct.c fixes merged this cycle -
  we need that to deal sanely with delayed-mntput stuff.  In the next
  pile, hopefully - that series is fairly short and localized
  (kernel/acct.c, fs/super.c and fs/namespace.c).  In this pile: more
  iov_iter work.  Most of prereqs for ->splice_write with sane locking
  order are there and Kent's dio rewrite would also fit nicely on top of
  this pile"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (70 commits)
  lock_parent: don't step on stale ->d_parent of all-but-freed one
  kill generic_file_splice_write()
  ceph: switch to iter_file_splice_write()
  shmem: switch to iter_file_splice_write()
  nfs: switch to iter_splice_write_file()
  fs/splice.c: remove unneeded exports
  ocfs2: switch to iter_file_splice_write()
  ->splice_write() via ->write_iter()
  bio_vec-backed iov_iter
  optimize copy_page_{to,from}_iter()
  bury generic_file_aio_{read,write}
  lustre: get rid of messing with iovecs
  ceph: switch to ->write_iter()
  ceph_sync_direct_write: stop poking into iov_iter guts
  ceph_sync_read: stop poking into iov_iter guts
  new helper: copy_page_from_iter()
  fuse: switch to ->write_iter()
  btrfs: switch to ->write_iter()
  ocfs2: switch to ->write_iter()
  xfs: switch to ->write_iter()
  ...

102 files changed:
Documentation/filesystems/Locking
Documentation/filesystems/vfs.txt
drivers/char/raw.c
drivers/mtd/nand/nandsim.c
drivers/staging/lustre/lustre/include/lclient.h
drivers/staging/lustre/lustre/lclient/lcommon_cl.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/rw.c
drivers/staging/lustre/lustre/llite/rw26.c
drivers/staging/lustre/lustre/llite/vvp_io.c
drivers/usb/gadget/storage_common.c
fs/9p/vfs_addr.c
fs/9p/vfs_file.c
fs/adfs/file.c
fs/affs/file.c
fs/afs/file.c
fs/afs/internal.h
fs/afs/write.c
fs/aio.c
fs/bfs/file.c
fs/block_dev.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/ceph/addr.c
fs/ceph/file.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/file.c
fs/dcache.c
fs/direct-io.c
fs/ecryptfs/file.c
fs/exofs/file.c
fs/exofs/inode.c
fs/ext2/file.c
fs/ext2/inode.c
fs/ext3/file.c
fs/ext3/inode.c
fs/ext4/ext4.h
fs/ext4/file.c
fs/ext4/indirect.c
fs/ext4/inode.c
fs/f2fs/data.c
fs/f2fs/file.c
fs/fat/file.c
fs/fat/inode.c
fs/file.c
fs/file_table.c
fs/fuse/cuse.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/gfs2/aops.c
fs/gfs2/file.c
fs/hfs/inode.c
fs/hfsplus/inode.c
fs/hostfs/hostfs_kern.c
fs/hpfs/file.c
fs/jffs2/file.c
fs/jfs/file.c
fs/jfs/inode.c
fs/logfs/file.c
fs/minix/file.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/internal.h
fs/nfs/nfs4file.c
fs/nilfs2/file.c
fs/nilfs2/inode.c
fs/ntfs/file.c
fs/ocfs2/aops.c
fs/ocfs2/file.c
fs/omfs/file.c
fs/open.c
fs/pipe.c
fs/ramfs/file-mmu.c
fs/ramfs/file-nommu.c
fs/read_write.c
fs/reiserfs/file.c
fs/reiserfs/inode.c
fs/romfs/mmap-nommu.c
fs/splice.c
fs/sysv/file.c
fs/ubifs/file.c
fs/udf/file.c
fs/udf/inode.c
fs/ufs/file.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_file.c
fs/xfs/xfs_trace.h
include/linux/blk_types.h
include/linux/ceph/libceph.h
include/linux/fs.h
include/linux/nfs_fs.h
include/linux/splice.h
include/linux/uio.h
mm/filemap.c
mm/iov_iter.c
mm/page_io.c
mm/process_vm_access.c
mm/shmem.c
mm/vmscan.c
net/ceph/pagevec.c

index eba7901342531d2dc089c9a39d990aa924b86526..b18dd17790299de1b01053117a065af1ed7f251f 100644 (file)
@@ -196,8 +196,7 @@ prototypes:
        void (*invalidatepage) (struct page *, unsigned int, unsigned int);
        int (*releasepage) (struct page *, int);
        void (*freepage)(struct page *);
-       int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs);
+       int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
        int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
                                unsigned long *);
        int (*migratepage)(struct address_space *, struct page *, struct page *);
@@ -431,6 +430,8 @@ prototypes:
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
        ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+       ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+       ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iterate) (struct file *, struct dir_context *);
        unsigned int (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
index 617f6d70c0778ce37716d25fde6f0c158f492707..a1d0d7a301657d674c653648534ee5be527919b9 100644 (file)
@@ -589,8 +589,7 @@ struct address_space_operations {
        void (*invalidatepage) (struct page *, unsigned int, unsigned int);
        int (*releasepage) (struct page *, int);
        void (*freepage)(struct page *);
-       ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs);
+       ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
        struct page* (*get_xip_page)(struct address_space *, sector_t,
                        int);
        /* migrate the contents of a page to the specified target */
@@ -807,6 +806,8 @@ struct file_operations {
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
        ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+       ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+       ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iterate) (struct file *, struct dir_context *);
        unsigned int (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -837,11 +838,15 @@ otherwise noted.
 
   read: called by read(2) and related system calls
 
-  aio_read: called by io_submit(2) and other asynchronous I/O operations
+  aio_read: vectored, possibly asynchronous read
+
+  read_iter: possibly asynchronous read with iov_iter as destination
 
   write: called by write(2) and related system calls
 
-  aio_write: called by io_submit(2) and other asynchronous I/O operations
+  aio_write: vectored, possibly asynchronous write
+
+  write_iter: possibly asynchronous write with iov_iter as source
 
   iterate: called when the VFS needs to read the directory contents
 
index 6e8d65e9b1d3c196ea2d2bd76b78530dd0387920..0102dc788608ec0060cf2fe1666e058c2d2d8076 100644 (file)
@@ -284,10 +284,10 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
 #endif
 
 static const struct file_operations raw_fops = {
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = blkdev_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = blkdev_write_iter,
        .fsync          = blkdev_fsync,
        .open           = raw_open,
        .release        = raw_release,
index 42e8a770e631c6eb4f98658cd9cef88d08ca1350..4f0d83648e5a5ad9f5f0260e21b01067f81099be 100644 (file)
@@ -575,12 +575,12 @@ static int alloc_device(struct nandsim *ns)
                cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
                if (IS_ERR(cfile))
                        return PTR_ERR(cfile);
-               if (!cfile->f_op->read && !cfile->f_op->aio_read) {
+               if (!(cfile->f_mode & FMODE_CAN_READ)) {
                        NS_ERR("alloc_device: cache file not readable\n");
                        err = -EINVAL;
                        goto err_close;
                }
-               if (!cfile->f_op->write && !cfile->f_op->aio_write) {
+               if (!(cfile->f_mode & FMODE_CAN_WRITE)) {
                        NS_ERR("alloc_device: cache file not writeable\n");
                        err = -EINVAL;
                        goto err_close;
index 827209ea6bd0e9685344b845f44b7560fdf5e1fe..386a36c00f572f871bd668f1fa5873a098714e73 100644 (file)
@@ -82,16 +82,7 @@ struct ccc_io {
        /**
         * I/O vector information to or from which read/write is going.
         */
-       struct iovec *cui_iov;
-       unsigned long cui_nrsegs;
-       /**
-        * Total iov count for left IO.
-        */
-       unsigned long cui_tot_nrsegs;
-       /**
-        * Old length for iov that was truncated partially.
-        */
-       size_t cui_iov_olen;
+       struct iov_iter *cui_iter;
        /**
         * Total size for the left IO.
         */
index dc24cfa5803722dd86669102189c27d793b83ecf..1b0c216bc5687742198c89c8d6f21b07036a11bb 100644 (file)
@@ -720,31 +720,12 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
 void ccc_io_update_iov(const struct lu_env *env,
                       struct ccc_io *cio, struct cl_io *io)
 {
-       int i;
        size_t size = io->u.ci_rw.crw_count;
 
-       cio->cui_iov_olen = 0;
-       if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0)
+       if (!cl_is_normalio(env, io) || cio->cui_iter == NULL)
                return;
 
-       for (i = 0; i < cio->cui_tot_nrsegs; i++) {
-               struct iovec *iv = &cio->cui_iov[i];
-
-               if (iv->iov_len < size)
-                       size -= iv->iov_len;
-               else {
-                       if (iv->iov_len > size) {
-                               cio->cui_iov_olen = iv->iov_len;
-                               iv->iov_len = size;
-                       }
-                       break;
-               }
-       }
-
-       cio->cui_nrsegs = i + 1;
-       LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs,
-                "tot_nrsegs: %lu, nrsegs: %lu\n",
-                cio->cui_tot_nrsegs, cio->cui_nrsegs);
+       iov_iter_truncate(cio->cui_iter, size);
 }
 
 int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
@@ -775,30 +756,7 @@ void ccc_io_advance(const struct lu_env *env,
        if (!cl_is_normalio(env, io))
                return;
 
-       LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs);
-       LASSERT(cio->cui_tot_count  >= nob);
-
-       cio->cui_iov    += cio->cui_nrsegs;
-       cio->cui_tot_nrsegs -= cio->cui_nrsegs;
-       cio->cui_tot_count  -= nob;
-
-       /* update the iov */
-       if (cio->cui_iov_olen > 0) {
-               struct iovec *iv;
-
-               cio->cui_iov--;
-               cio->cui_tot_nrsegs++;
-               iv = &cio->cui_iov[0];
-               if (io->ci_continue) {
-                       iv->iov_base += iv->iov_len;
-                       LASSERT(cio->cui_iov_olen > iv->iov_len);
-                       iv->iov_len = cio->cui_iov_olen - iv->iov_len;
-               } else {
-                       /* restore the iov_len, in case of restart io. */
-                       iv->iov_len = cio->cui_iov_olen;
-               }
-               cio->cui_iov_olen = 0;
-       }
+       iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count  -= nob);
 }
 
 /**
index c4ddec2b3589eb743475f022c29d47ce49dbbec8..716e1ee0104f6fe0c2c1323689f216acff807bdc 100644 (file)
@@ -1114,9 +1114,7 @@ restart:
 
                switch (vio->cui_io_subtype) {
                case IO_NORMAL:
-                       cio->cui_iov = args->u.normal.via_iov;
-                       cio->cui_nrsegs = args->u.normal.via_nrsegs;
-                       cio->cui_tot_nrsegs = cio->cui_nrsegs;
+                       cio->cui_iter = args->u.normal.via_iter;
                        cio->cui_iocb = args->u.normal.via_iocb;
                        if ((iot == CIT_WRITE) &&
                            !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1180,58 +1178,23 @@ out:
        return result;
 }
 
-static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct lu_env      *env;
        struct vvp_io_args *args;
-       size_t        count = 0;
        ssize_t      result;
        int              refcheck;
 
-       result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-       if (result)
-               return result;
-
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
                return PTR_ERR(env);
 
        args = vvp_env_args(env, IO_NORMAL);
-       args->u.normal.via_iov = (struct iovec *)iov;
-       args->u.normal.via_nrsegs = nr_segs;
+       args->u.normal.via_iter = to;
        args->u.normal.via_iocb = iocb;
 
        result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
-                                   &iocb->ki_pos, count);
-       cl_env_put(env, &refcheck);
-       return result;
-}
-
-static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-                           loff_t *ppos)
-{
-       struct lu_env *env;
-       struct iovec  *local_iov;
-       struct kiocb  *kiocb;
-       ssize_t result;
-       int         refcheck;
-
-       env = cl_env_get(&refcheck);
-       if (IS_ERR(env))
-               return PTR_ERR(env);
-
-       local_iov = &vvp_env_info(env)->vti_local_iov;
-       kiocb = &vvp_env_info(env)->vti_kiocb;
-       local_iov->iov_base = (void __user *)buf;
-       local_iov->iov_len = count;
-       init_sync_kiocb(kiocb, file);
-       kiocb->ki_pos = *ppos;
-       kiocb->ki_nbytes = count;
-
-       result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
-       *ppos = kiocb->ki_pos;
-
+                                   &iocb->ki_pos, iov_iter_count(to));
        cl_env_put(env, &refcheck);
        return result;
 }
@@ -1239,64 +1202,27 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
 /*
  * Write to a file (through the page cache).
  */
-static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct lu_env      *env;
        struct vvp_io_args *args;
-       size_t        count = 0;
        ssize_t      result;
        int              refcheck;
 
-       result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-       if (result)
-               return result;
-
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
                return PTR_ERR(env);
 
        args = vvp_env_args(env, IO_NORMAL);
-       args->u.normal.via_iov = (struct iovec *)iov;
-       args->u.normal.via_nrsegs = nr_segs;
+       args->u.normal.via_iter = from;
        args->u.normal.via_iocb = iocb;
 
        result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
-                                 &iocb->ki_pos, count);
+                                 &iocb->ki_pos, iov_iter_count(from));
        cl_env_put(env, &refcheck);
        return result;
 }
 
-static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-                            loff_t *ppos)
-{
-       struct lu_env *env;
-       struct iovec  *local_iov;
-       struct kiocb  *kiocb;
-       ssize_t result;
-       int         refcheck;
-
-       env = cl_env_get(&refcheck);
-       if (IS_ERR(env))
-               return PTR_ERR(env);
-
-       local_iov = &vvp_env_info(env)->vti_local_iov;
-       kiocb = &vvp_env_info(env)->vti_kiocb;
-       local_iov->iov_base = (void __user *)buf;
-       local_iov->iov_len = count;
-       init_sync_kiocb(kiocb, file);
-       kiocb->ki_pos = *ppos;
-       kiocb->ki_nbytes = count;
-
-       result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
-       *ppos = kiocb->ki_pos;
-
-       cl_env_put(env, &refcheck);
-       return result;
-}
-
-
-
 /*
  * Send file content (through pagecache) somewhere with helper
  */
@@ -3143,10 +3069,10 @@ int ll_inode_permission(struct inode *inode, int mask)
 
 /* -o localflock - only provides locally consistent flock locks */
 struct file_operations ll_file_operations = {
-       .read      = ll_file_read,
-       .aio_read = ll_file_aio_read,
-       .write    = ll_file_write,
-       .aio_write = ll_file_aio_write,
+       .read      = new_sync_read,
+       .read_iter = ll_file_read_iter,
+       .write    = new_sync_write,
+       .write_iter = ll_file_write_iter,
        .unlocked_ioctl = ll_file_ioctl,
        .open      = ll_file_open,
        .release        = ll_file_release,
@@ -3158,10 +3084,10 @@ struct file_operations ll_file_operations = {
 };
 
 struct file_operations ll_file_operations_flock = {
-       .read      = ll_file_read,
-       .aio_read    = ll_file_aio_read,
-       .write    = ll_file_write,
-       .aio_write   = ll_file_aio_write,
+       .read      = new_sync_read,
+       .read_iter    = ll_file_read_iter,
+       .write    = new_sync_write,
+       .write_iter   = ll_file_write_iter,
        .unlocked_ioctl = ll_file_ioctl,
        .open      = ll_file_open,
        .release        = ll_file_release,
@@ -3176,10 +3102,10 @@ struct file_operations ll_file_operations_flock = {
 
 /* These are for -o noflock - to return ENOSYS on flock calls */
 struct file_operations ll_file_operations_noflock = {
-       .read      = ll_file_read,
-       .aio_read    = ll_file_aio_read,
-       .write    = ll_file_write,
-       .aio_write   = ll_file_aio_write,
+       .read      = new_sync_read,
+       .read_iter    = ll_file_read_iter,
+       .write    = new_sync_write,
+       .write_iter   = ll_file_write_iter,
        .unlocked_ioctl = ll_file_ioctl,
        .open      = ll_file_open,
        .release        = ll_file_release,
index dde7632ba01fa8dab671fb43995723bba1f5d3dd..140ee947ba4949ea547ac03ebaf9a9efb9e51ab1 100644 (file)
@@ -917,8 +917,7 @@ struct vvp_io_args {
        union {
                struct {
                        struct kiocb      *via_iocb;
-                       struct iovec      *via_iov;
-                       unsigned long      via_nrsegs;
+                       struct iov_iter   *via_iter;
                } normal;
                struct {
                        struct pipe_inode_info  *via_pipe;
index f0122c568a099fbb2ea519ddc4bca5d97a34495a..56162103cc79c2dad9038abb11d715c6e9909f45 100644 (file)
@@ -151,8 +151,7 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
                result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
                if (result == 0) {
                        cio->cui_fd = LUSTRE_FPRIVATE(file);
-                       cio->cui_iov = NULL;
-                       cio->cui_nrsegs = 0;
+                       cio->cui_iter = NULL;
                        result = cl_io_iter_init(env, io);
                        if (result == 0) {
                                result = cl_io_lock(env, io);
index 55ca8d3c3e46451b654acdc298333035fcb11e4b..af84c1aaa5f83f6c994a64da4b164535cbe54ac0 100644 (file)
@@ -218,14 +218,11 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
        int i;
 
        for (i = 0; i < npages; i++) {
-               if (pages[i] == NULL)
-                       break;
                if (do_dirty)
                        set_page_dirty_lock(pages[i]);
                page_cache_release(pages[i]);
        }
-
-       OBD_FREE_LARGE(pages, npages * sizeof(*pages));
+       kvfree(pages);
 }
 
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -363,18 +360,16 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
                      ~(DT_MAX_BRW_SIZE - 1))
 static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-                              const struct iovec *iov, loff_t file_offset,
-                              unsigned long nr_segs)
+                              struct iov_iter *iter, loff_t file_offset)
 {
        struct lu_env *env;
        struct cl_io *io;
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        struct ccc_object *obj = cl_inode2ccc(inode);
-       long count = iov_length(iov, nr_segs);
-       long tot_bytes = 0, result = 0;
+       ssize_t count = iov_iter_count(iter);
+       ssize_t tot_bytes = 0, result = 0;
        struct ll_inode_info *lli = ll_i2info(inode);
-       unsigned long seg = 0;
        long size = MAX_DIO_SIZE;
        int refcheck;
 
@@ -392,11 +387,8 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
               MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
        /* Check that all user buffers are aligned as well */
-       for (seg = 0; seg < nr_segs; seg++) {
-               if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-                   (iov[seg].iov_len & ~CFS_PAGE_MASK))
-                       return -EINVAL;
-       }
+       if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+               return -EINVAL;
 
        env = cl_env_get(&refcheck);
        LASSERT(!IS_ERR(env));
@@ -411,63 +403,49 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                mutex_lock(&inode->i_mutex);
 
        LASSERT(obj->cob_transient_pages == 0);
-       for (seg = 0; seg < nr_segs; seg++) {
-               long iov_left = iov[seg].iov_len;
-               unsigned long user_addr = (unsigned long)iov[seg].iov_base;
+       while (iov_iter_count(iter)) {
+               struct page **pages;
+               size_t offs;
 
+               count = min_t(size_t, iov_iter_count(iter), size);
                if (rw == READ) {
                        if (file_offset >= i_size_read(inode))
                                break;
-                       if (file_offset + iov_left > i_size_read(inode))
-                               iov_left = i_size_read(inode) - file_offset;
+                       if (file_offset + count > i_size_read(inode))
+                               count = i_size_read(inode) - file_offset;
                }
 
-               while (iov_left > 0) {
-                       struct page **pages;
-                       int page_count, max_pages = 0;
-                       long bytes;
-
-                       bytes = min(size, iov_left);
-                       page_count = ll_get_user_pages(rw, user_addr, bytes,
-                                                      &pages, &max_pages);
-                       if (likely(page_count > 0)) {
-                               if (unlikely(page_count <  max_pages))
-                                       bytes = page_count << PAGE_CACHE_SHIFT;
-                               result = ll_direct_IO_26_seg(env, io, rw, inode,
-                                                            file->f_mapping,
-                                                            bytes, file_offset,
-                                                            pages, page_count);
-                               ll_free_user_pages(pages, max_pages, rw==READ);
-                       } else if (page_count == 0) {
-                               GOTO(out, result = -EFAULT);
-                       } else {
-                               result = page_count;
-                       }
-                       if (unlikely(result <= 0)) {
-                               /* If we can't allocate a large enough buffer
-                                * for the request, shrink it to a smaller
-                                * PAGE_SIZE multiple and try again.
-                                * We should always be able to kmalloc for a
-                                * page worth of page pointers = 4MB on i386. */
-                               if (result == -ENOMEM &&
-                                   size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
-                                          PAGE_CACHE_SIZE) {
-                                       size = ((((size / 2) - 1) |
-                                                ~CFS_PAGE_MASK) + 1) &
-                                               CFS_PAGE_MASK;
-                                       CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
-                                              size);
-                                       continue;
-                               }
-
-                               GOTO(out, result);
+               result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+               if (likely(result > 0)) {
+                       int n = (result + offs + PAGE_SIZE - 1) / PAGE_SIZE;
+                       result = ll_direct_IO_26_seg(env, io, rw, inode,
+                                                    file->f_mapping,
+                                                    result, file_offset,
+                                                    pages, n);
+                       ll_free_user_pages(pages, n, rw==READ);
+               }
+               if (unlikely(result <= 0)) {
+                       /* If we can't allocate a large enough buffer
+                        * for the request, shrink it to a smaller
+                        * PAGE_SIZE multiple and try again.
+                        * We should always be able to kmalloc for a
+                        * page worth of page pointers = 4MB on i386. */
+                       if (result == -ENOMEM &&
+                           size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+                                  PAGE_CACHE_SIZE) {
+                               size = ((((size / 2) - 1) |
+                                        ~CFS_PAGE_MASK) + 1) &
+                                       CFS_PAGE_MASK;
+                               CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+                                      size);
+                               continue;
                        }
 
-                       tot_bytes += result;
-                       file_offset += result;
-                       iov_left -= result;
-                       user_addr += result;
+                       GOTO(out, result);
                }
+               iov_iter_advance(iter, result);
+               tot_bytes += result;
+               file_offset += result;
        }
 out:
        LASSERT(obj->cob_transient_pages == 0);
index 7dd2b4723c5fd6fdded98fadbcb63a68ce8c80fa..0e0b404cb5e6cc3b33dc8b736675485a617cdd55 100644 (file)
@@ -211,27 +211,26 @@ static int vvp_mmap_locks(const struct lu_env *env,
        struct cl_lock_descr   *descr = &cti->cti_descr;
        ldlm_policy_data_t      policy;
        unsigned long      addr;
-       unsigned long      seg;
        ssize_t          count;
        int                  result;
+       struct iov_iter i;
+       struct iovec iov;
 
        LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
        if (!cl_is_normalio(env, io))
                return 0;
 
-       if (vio->cui_iov == NULL) /* nfs or loop back device write */
+       if (vio->cui_iter == NULL) /* nfs or loop back device write */
                return 0;
 
        /* No MM (e.g. NFS)? No vmas too. */
        if (mm == NULL)
                return 0;
 
-       for (seg = 0; seg < vio->cui_nrsegs; seg++) {
-               const struct iovec *iv = &vio->cui_iov[seg];
-
-               addr = (unsigned long)iv->iov_base;
-               count = iv->iov_len;
+       iov_for_each(iov, i, *(vio->cui_iter)) {
+               addr = (unsigned long)iov.iov_base;
+               count = iov.iov_len;
                if (count == 0)
                        continue;
 
@@ -527,9 +526,7 @@ static int vvp_io_read_start(const struct lu_env *env,
        switch (vio->cui_io_subtype) {
        case IO_NORMAL:
                LASSERT(cio->cui_iocb->ki_pos == pos);
-               result = generic_file_aio_read(cio->cui_iocb,
-                                              cio->cui_iov, cio->cui_nrsegs,
-                                              cio->cui_iocb->ki_pos);
+               result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
                break;
        case IO_SPLICE:
                result = generic_file_splice_read(file, &pos,
@@ -595,12 +592,11 @@ static int vvp_io_write_start(const struct lu_env *env,
 
        CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-       if (cio->cui_iov == NULL) /* from a temp io in ll_cl_init(). */
+       if (cio->cui_iter == NULL) /* from a temp io in ll_cl_init(). */
                result = 0;
        else
-               result = generic_file_aio_write(cio->cui_iocb,
-                                               cio->cui_iov, cio->cui_nrsegs,
-                                               cio->cui_iocb->ki_pos);
+               result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+
        if (result > 0) {
                if (result < cnt)
                        io->ci_continue = 0;
@@ -1162,10 +1158,9 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
                 *  results."  -- Single Unix Spec */
                if (count == 0)
                        result = 1;
-               else {
+               else
                        cio->cui_tot_count = count;
-                       cio->cui_tot_nrsegs = 0;
-               }
+
                /* for read/write, we store the jobid in the inode, and
                 * it'll be fetched by osc when building RPC.
                 *
index ff205a7bc55c9aefcd98994aeb3c25fe4c37eb28..648f9e489b39bb3a291f091771dcd099013176db 100644 (file)
@@ -220,11 +220,11 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
         * If we can't read the file, it's no good.
         * If we can't write the file, use it read-only.
         */
-       if (!(filp->f_op->read || filp->f_op->aio_read)) {
+       if (!(filp->f_mode & FMODE_CAN_READ)) {
                LINFO(curlun, "file not readable: %s\n", filename);
                goto out;
        }
-       if (!(filp->f_op->write || filp->f_op->aio_write))
+       if (!(filp->f_mode & FMODE_CAN_WRITE))
                ro = 1;
 
        size = i_size_read(inode->i_mapping->host);
index c71e88602ff49a5836a0698a033e3042bdf95f9c..cc1cfae726b38fdc645fc7af79b4879371727b0a 100644 (file)
@@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page)
  *
  */
 static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-              loff_t pos, unsigned long nr_segs)
+v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
        /*
         * FIXME
@@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
         */
        p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
                 iocb->ki_filp->f_path.dentry->d_name.name,
-                (long long)pos, nr_segs);
+                (long long)pos, iter->nr_segs);
 
        return -EINVAL;
 }
index 96e550760699a8895cbb58a4dd26a18ab1e050a3..520c11c2dcca4c9ff31a591600ca0c8ced52481c 100644 (file)
@@ -692,7 +692,7 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
 {
        if (filp->f_flags & O_DIRECT)
                return v9fs_direct_read(filp, data, count, offset);
-       return do_sync_read(filp, data, count, offset);
+       return new_sync_read(filp, data, count, offset);
 }
 
 /**
@@ -760,7 +760,7 @@ err_out:
 
 buff_write:
        mutex_unlock(&inode->i_mutex);
-       return do_sync_write(filp, data, count, offsetp);
+       return new_sync_write(filp, data, count, offsetp);
 }
 
 /**
@@ -778,7 +778,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
 
        if (filp->f_flags & O_DIRECT)
                return v9fs_direct_write(filp, data, count, offset);
-       return do_sync_write(filp, data, count, offset);
+       return new_sync_write(filp, data, count, offset);
 }
 
 
@@ -847,8 +847,8 @@ const struct file_operations v9fs_cached_file_operations = {
        .llseek = generic_file_llseek,
        .read = v9fs_cached_file_read,
        .write = v9fs_cached_file_write,
-       .aio_read = generic_file_aio_read,
-       .aio_write = generic_file_aio_write,
+       .read_iter = generic_file_read_iter,
+       .write_iter = generic_file_write_iter,
        .open = v9fs_file_open,
        .release = v9fs_dir_release,
        .lock = v9fs_file_lock,
@@ -860,8 +860,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
        .llseek = generic_file_llseek,
        .read = v9fs_cached_file_read,
        .write = v9fs_cached_file_write,
-       .aio_read = generic_file_aio_read,
-       .aio_write = generic_file_aio_write,
+       .read_iter = generic_file_read_iter,
+       .write_iter = generic_file_write_iter,
        .open = v9fs_file_open,
        .release = v9fs_dir_release,
        .lock = v9fs_file_lock_dotl,
index a36da5382b40dc9c09ab6aa59762d9c1a7808b72..07c9edce5aa768ddeb7ae203c8c9cab489ea8417 100644 (file)
 
 const struct file_operations adfs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
        .mmap           = generic_file_mmap,
        .fsync          = generic_file_fsync,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .splice_read    = generic_file_splice_read,
 };
 
index 0270303388ee669515c8829f7370da24db6d16e2..a7fe57d2cd9a0aa6a59df2cd90778127ebc2bbc3 100644 (file)
@@ -27,10 +27,10 @@ static int affs_file_release(struct inode *inode, struct file *filp);
 
 const struct file_operations affs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .open           = affs_file_open,
        .release        = affs_file_release,
index 66d50fe2ee459a887511381e8e375db72d2bf1f3..932ce07948b387d7aa75dffb3a9de1e21f1f50b8 100644 (file)
@@ -31,10 +31,10 @@ const struct file_operations afs_file_operations = {
        .open           = afs_open,
        .release        = afs_release,
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = afs_file_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = afs_file_write,
        .mmap           = generic_file_readonly_mmap,
        .splice_read    = generic_file_splice_read,
        .fsync          = afs_fsync,
index 590b55f46d61dd1ca169ff78a3dbf4f5d32b813c..71d5982312f3d11dd6e3dd23079e5c6bef7c23a6 100644 (file)
@@ -747,8 +747,7 @@ extern int afs_write_end(struct file *file, struct address_space *mapping,
 extern int afs_writepage(struct page *, struct writeback_control *);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
-extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
-                             unsigned long, loff_t);
+extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
 extern int afs_writeback_all(struct afs_vnode *);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 
index a890db4b9898fc1d888c5e7285da55db85e4da54..ab6adfd525168800524349791c061512a47a7d81 100644 (file)
@@ -625,15 +625,14 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 /*
  * write to an AFS file
  */
-ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
-                      unsigned long nr_segs, loff_t pos)
+ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
        ssize_t result;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(from);
 
-       _enter("{%x.%u},{%zu},%lu,",
-              vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
+       _enter("{%x.%u},{%zu},",
+              vnode->fid.vid, vnode->fid.vnode, count);
 
        if (IS_SWAPFILE(&vnode->vfs_inode)) {
                printk(KERN_INFO
@@ -644,7 +643,7 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
        if (!count)
                return 0;
 
-       result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       result = generic_file_write_iter(iocb, from);
        if (IS_ERR_VALUE(result)) {
                _leave(" = %zd", result);
                return result;
index a0ed6c7d2cd2a3e91a5d12e48af705d75afe315a..56b28607c32d14704f5aa37066e57b12e4fff79b 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1241,6 +1241,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 
 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
                            unsigned long, loff_t);
+typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
                                     int rw, char __user *buf,
@@ -1298,7 +1299,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
        int rw;
        fmode_t mode;
        aio_rw_op *rw_op;
+       rw_iter_op *iter_op;
        struct iovec inline_vec, *iovec = &inline_vec;
+       struct iov_iter iter;
 
        switch (opcode) {
        case IOCB_CMD_PREAD:
@@ -1306,6 +1309,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
                mode    = FMODE_READ;
                rw      = READ;
                rw_op   = file->f_op->aio_read;
+               iter_op = file->f_op->read_iter;
                goto rw_common;
 
        case IOCB_CMD_PWRITE:
@@ -1313,12 +1317,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
                mode    = FMODE_WRITE;
                rw      = WRITE;
                rw_op   = file->f_op->aio_write;
+               iter_op = file->f_op->write_iter;
                goto rw_common;
 rw_common:
                if (unlikely(!(file->f_mode & mode)))
                        return -EBADF;
 
-               if (!rw_op)
+               if (!rw_op && !iter_op)
                        return -EINVAL;
 
                ret = (opcode == IOCB_CMD_PREADV ||
@@ -1347,7 +1352,12 @@ rw_common:
                if (rw == WRITE)
                        file_start_write(file);
 
-               ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+               if (iter_op) {
+                       iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+                       ret = iter_op(req, &iter);
+               } else {
+                       ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+               }
 
                if (rw == WRITE)
                        file_end_write(file);
index ae28922183357d4c0e4d491a452919e125ba8bc3..e7f88ace1a2508d260ea8feae3addc5e2752980c 100644 (file)
 
 const struct file_operations bfs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .splice_read    = generic_file_splice_read,
 };
index 83fba15cc394071a53b57245b95942c18177d9c7..6d7274619bf916c2dcf0d7744ba8d888d948d711 100644 (file)
@@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 }
 
 static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs)
+blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+                       loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
 
-       return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
-                                   nr_segs, blkdev_get_block, NULL, NULL, 0);
+       return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
+                                   offset, blkdev_get_block,
+                                   NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1571,43 +1572,38 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
  * Does not take i_mutex for the write and thus is not for general purpose
  * use.
  */
-ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                        unsigned long nr_segs, loff_t pos)
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct blk_plug plug;
        ssize_t ret;
 
-       BUG_ON(iocb->ki_pos != pos);
-
        blk_start_plug(&plug);
-       ret = __generic_file_aio_write(iocb, iov, nr_segs);
+       ret = __generic_file_write_iter(iocb, from);
        if (ret > 0) {
                ssize_t err;
-
-               err = generic_write_sync(file, pos, ret);
+               err = generic_write_sync(file, iocb->ki_pos - ret, ret);
                if (err < 0)
                        ret = err;
        }
        blk_finish_plug(&plug);
        return ret;
 }
-EXPORT_SYMBOL_GPL(blkdev_aio_write);
+EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
-static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
-                        unsigned long nr_segs, loff_t pos)
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct file *file = iocb->ki_filp;
        struct inode *bd_inode = file->f_mapping->host;
        loff_t size = i_size_read(bd_inode);
+       loff_t pos = iocb->ki_pos;
 
        if (pos >= size)
                return 0;
 
        size -= pos;
-       if (size < iocb->ki_nbytes)
-               nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
-       return generic_file_aio_read(iocb, iov, nr_segs, pos);
+       iov_iter_truncate(to, size);
+       return generic_file_read_iter(iocb, to);
 }
 
 /*
@@ -1639,10 +1635,10 @@ const struct file_operations def_blk_fops = {
        .open           = blkdev_open,
        .release        = blkdev_close,
        .llseek         = block_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = blkdev_aio_read,
-       .aio_write      = blkdev_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = blkdev_read_iter,
+       .write_iter     = blkdev_write_iter,
        .mmap           = generic_file_mmap,
        .fsync          = blkdev_fsync,
        .unlocked_ioctl = block_ioctl,
@@ -1650,7 +1646,7 @@ const struct file_operations def_blk_fops = {
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
index e472441feb5de80ce68c85e6f892531893706a13..1f2b99cb55eaef682c51ae3c8b227e88aafbf7e7 100644 (file)
@@ -448,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
                write_bytes -= copied;
                total_copied += copied;
 
-               /* Return to btrfs_file_aio_write to fault page */
+               /* Return to btrfs_file_write_iter to fault page */
                if (unlikely(copied == 0))
                        break;
 
@@ -1675,27 +1675,22 @@ again:
 }
 
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
-                                   const struct iovec *iov,
-                                   unsigned long nr_segs, loff_t pos,
-                                   size_t count, size_t ocount)
+                                   struct iov_iter *from,
+                                   loff_t pos)
 {
        struct file *file = iocb->ki_filp;
-       struct iov_iter i;
        ssize_t written;
        ssize_t written_buffered;
        loff_t endbyte;
        int err;
 
-       written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
-                                           count, ocount);
+       written = generic_file_direct_write(iocb, from, pos);
 
-       if (written < 0 || written == count)
+       if (written < 0 || !iov_iter_count(from))
                return written;
 
        pos += written;
-       count -= written;
-       iov_iter_init(&i, iov, nr_segs, count, written);
-       written_buffered = __btrfs_buffered_write(file, &i, pos);
+       written_buffered = __btrfs_buffered_write(file, from, pos);
        if (written_buffered < 0) {
                err = written_buffered;
                goto out;
@@ -1730,9 +1725,8 @@ static void update_time_for_write(struct inode *inode)
                inode_inc_iversion(inode);
 }
 
-static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
-                                   const struct iovec *iov,
-                                   unsigned long nr_segs, loff_t pos)
+static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
+                                   struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -1741,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        u64 end_pos;
        ssize_t num_written = 0;
        ssize_t err = 0;
-       size_t count, ocount;
+       size_t count = iov_iter_count(from);
        bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
+       loff_t pos = iocb->ki_pos;
 
        mutex_lock(&inode->i_mutex);
 
-       err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-       if (err) {
-               mutex_unlock(&inode->i_mutex);
-               goto out;
-       }
-       count = ocount;
-
        current->backing_dev_info = inode->i_mapping->backing_dev_info;
        err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
        if (err) {
@@ -1765,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                goto out;
        }
 
+       iov_iter_truncate(from, count);
+
        err = file_remove_suid(file);
        if (err) {
                mutex_unlock(&inode->i_mutex);
@@ -1806,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                atomic_inc(&BTRFS_I(inode)->sync_writers);
 
        if (unlikely(file->f_flags & O_DIRECT)) {
-               num_written = __btrfs_direct_write(iocb, iov, nr_segs,
-                                                  pos, count, ocount);
+               num_written = __btrfs_direct_write(iocb, from, pos);
        } else {
-               struct iov_iter i;
-
-               iov_iter_init(&i, iov, nr_segs, count, num_written);
-
-               num_written = __btrfs_buffered_write(file, &i, pos);
+               num_written = __btrfs_buffered_write(file, from, pos);
                if (num_written > 0)
                        iocb->ki_pos = pos + num_written;
        }
@@ -2740,11 +2725,11 @@ out:
 
 const struct file_operations btrfs_file_operations = {
        .llseek         = btrfs_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
        .splice_read    = generic_file_splice_read,
-       .aio_write      = btrfs_file_aio_write,
+       .write_iter     = btrfs_file_write_iter,
        .mmap           = btrfs_file_mmap,
        .open           = generic_file_open,
        .release        = btrfs_release_file,
index 7fa5f7fd7bc79259ed5a5e51131cbf6c8d07d919..8925f66a14115c9d733182f2ec4d113be5be5edd 100644 (file)
@@ -7445,39 +7445,30 @@ free_ordered:
 }
 
 static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
-                       const struct iovec *iov, loff_t offset,
-                       unsigned long nr_segs)
+                       const struct iov_iter *iter, loff_t offset)
 {
        int seg;
        int i;
-       size_t size;
-       unsigned long addr;
        unsigned blocksize_mask = root->sectorsize - 1;
        ssize_t retval = -EINVAL;
-       loff_t end = offset;
 
        if (offset & blocksize_mask)
                goto out;
 
-       /* Check the memory alignment.  Blocks cannot straddle pages */
-       for (seg = 0; seg < nr_segs; seg++) {
-               addr = (unsigned long)iov[seg].iov_base;
-               size = iov[seg].iov_len;
-               end += size;
-               if ((addr & blocksize_mask) || (size & blocksize_mask))
-                       goto out;
-
-               /* If this is a write we don't need to check anymore */
-               if (rw & WRITE)
-                       continue;
+       if (iov_iter_alignment(iter) & blocksize_mask)
+               goto out;
 
-               /*
-                * Check to make sure we don't have duplicate iov_base's in this
-                * iovec, if so return EINVAL, otherwise we'll get csum errors
-                * when reading back.
-                */
-               for (i = seg + 1; i < nr_segs; i++) {
-                       if (iov[seg].iov_base == iov[i].iov_base)
+       /* If this is a write we don't need to check anymore */
+       if (rw & WRITE)
+               return 0;
+       /*
+        * Check to make sure we don't have duplicate iov_base's in this
+        * iovec, if so return EINVAL, otherwise we'll get csum errors
+        * when reading back.
+        */
+       for (seg = 0; seg < iter->nr_segs; seg++) {
+               for (i = seg + 1; i < iter->nr_segs; i++) {
+                       if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
                                goto out;
                }
        }
@@ -7487,8 +7478,7 @@ out:
 }
 
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
-                       const struct iovec *iov, loff_t offset,
-                       unsigned long nr_segs)
+                       struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
@@ -7498,8 +7488,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
        bool relock = false;
        ssize_t ret;
 
-       if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-                           offset, nr_segs))
+       if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
                return 0;
 
        atomic_inc(&inode->i_dio_count);
@@ -7511,7 +7500,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
         * we need to flush the dirty pages again to make absolutely sure
         * that any outstanding dirty pages are on disk.
         */
-       count = iov_length(iov, nr_segs);
+       count = iov_iter_count(iter);
        if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
                     &BTRFS_I(inode)->runtime_flags))
                filemap_fdatawrite_range(inode->i_mapping, offset, count);
@@ -7538,7 +7527,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
        ret = __blockdev_direct_IO(rw, iocb, inode,
                        BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-                       iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+                       iter, offset, btrfs_get_blocks_direct, NULL,
                        btrfs_submit_direct, flags);
        if (rw & WRITE) {
                if (ret < 0 && ret != -EIOCBQUEUED)
index 65a30e817dd80ab9c7264ade89b9cae563998465..4f3f69079f362280379edf3b13c4766247c764fa 100644 (file)
@@ -1187,8 +1187,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
  * never get called.
  */
 static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
-                             const struct iovec *iov,
-                             loff_t pos, unsigned long nr_segs)
+                             struct iov_iter *iter,
+                             loff_t pos)
 {
        WARN_ON(1);
        return -EINVAL;
index 88a6df4cbe6d8a52bd083a756ac452b798c33708..302085100c28af1a2ed67269e955b3d0839539be 100644 (file)
@@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
        struct page **pages;
        u64 off = iocb->ki_pos;
        int num_pages, ret;
-       size_t len = i->count;
+       size_t len = iov_iter_count(i);
 
        dout("sync_read on file %p %llu~%u %s\n", file, off,
             (unsigned)len,
@@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 
        if (file->f_flags & O_DIRECT) {
                while (iov_iter_count(i)) {
-                       void __user *data = i->iov[0].iov_base + i->iov_offset;
-                       size_t len = i->iov[0].iov_len - i->iov_offset;
+                       size_t start;
+                       ssize_t n;
 
-                       num_pages = calc_pages_for((unsigned long)data, len);
-                       pages = ceph_get_direct_page_vector(data,
-                                                           num_pages, true);
-                       if (IS_ERR(pages))
-                               return PTR_ERR(pages);
+                       n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start);
+                       if (n < 0)
+                               return n;
 
-                       ret = striped_read(inode, off, len,
+                       num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+                       ret = striped_read(inode, off, n,
                                           pages, num_pages, checkeof,
-                                          1, (unsigned long)data & ~PAGE_MASK);
+                                          1, start);
+
                        ceph_put_page_vector(pages, num_pages, true);
 
                        if (ret <= 0)
                                break;
                        off += ret;
                        iov_iter_advance(i, ret);
-                       if (ret < len)
+                       if (ret < n)
                                break;
                }
        } else {
@@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
                                        num_pages, checkeof, 0, 0);
                if (ret > 0) {
                        int l, k = 0;
-                       size_t left = len = ret;
+                       size_t left = ret;
 
                        while (left) {
-                               void __user *data = i->iov[0].iov_base
-                                                       + i->iov_offset;
-                               l = min(i->iov[0].iov_len - i->iov_offset,
-                                       left);
-
-                               ret = ceph_copy_page_vector_to_user(&pages[k],
-                                                                   data, off,
-                                                                   l);
-                               if (ret > 0) {
-                                       iov_iter_advance(i, ret);
-                                       left -= ret;
-                                       off += ret;
-                                       k = calc_pages_for(iocb->ki_pos,
-                                                          len - left + 1) - 1;
-                                       BUG_ON(k >= num_pages && left);
-                               } else
+                               int copy = min_t(size_t, PAGE_SIZE, left);
+                               l = copy_page_to_iter(pages[k++], 0, copy, i);
+                               off += l;
+                               left -= l;
+                               if (l < copy)
                                        break;
                        }
                }
@@ -541,8 +531,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
  * objects, rollback on failure, etc.)
  */
 static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
-                      unsigned long nr_segs, size_t count)
+ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -556,11 +545,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
        int written = 0;
        int flags;
        int check_caps = 0;
-       int page_align;
        int ret;
        struct timespec mtime = CURRENT_TIME;
        loff_t pos = iocb->ki_pos;
-       struct iov_iter i;
+       size_t count = iov_iter_count(from);
 
        if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
@@ -582,13 +570,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
                CEPH_OSD_FLAG_ONDISK |
                CEPH_OSD_FLAG_WRITE;
 
-       iov_iter_init(&i, iov, nr_segs, count, 0);
-
-       while (iov_iter_count(&i) > 0) {
-               void __user *data = i.iov->iov_base + i.iov_offset;
-               u64 len = i.iov->iov_len - i.iov_offset;
-
-               page_align = (unsigned long)data & ~PAGE_MASK;
+       while (iov_iter_count(from) > 0) {
+               u64 len = iov_iter_single_seg_count(from);
+               size_t start;
+               ssize_t n;
 
                snapc = ci->i_snap_realm->cached_context;
                vino = ceph_vino(inode);
@@ -604,20 +589,21 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
                        break;
                }
 
-               num_pages = calc_pages_for(page_align, len);
-               pages = ceph_get_direct_page_vector(data, num_pages, false);
-               if (IS_ERR(pages)) {
-                       ret = PTR_ERR(pages);
-                       goto out;
+               n = iov_iter_get_pages_alloc(from, &pages, len, &start);
+               if (unlikely(n < 0)) {
+                       ret = n;
+                       ceph_osdc_put_request(req);
+                       break;
                }
 
+               num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
                /*
                 * throw out any page cache pages in this range. this
                 * may block.
                 */
                truncate_inode_pages_range(inode->i_mapping, pos,
-                                  (pos+len) | (PAGE_CACHE_SIZE-1));
-               osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+                                  (pos+n) | (PAGE_CACHE_SIZE-1));
+               osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
                                                false, false);
 
                /* BUG_ON(vino.snap != CEPH_NOSNAP); */
@@ -629,22 +615,20 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
                ceph_put_page_vector(pages, num_pages, false);
 
-out:
                ceph_osdc_put_request(req);
-               if (ret == 0) {
-                       pos += len;
-                       written += len;
-                       iov_iter_advance(&i, (size_t)len);
-
-                       if (pos > i_size_read(inode)) {
-                               check_caps = ceph_inode_set_size(inode, pos);
-                               if (check_caps)
-                                       ceph_check_caps(ceph_inode(inode),
-                                                       CHECK_CAPS_AUTHONLY,
-                                                       NULL);
-                       }
-               } else
+               if (ret)
                        break;
+               pos += n;
+               written += n;
+               iov_iter_advance(from, n);
+
+               if (pos > i_size_read(inode)) {
+                       check_caps = ceph_inode_set_size(inode, pos);
+                       if (check_caps)
+                               ceph_check_caps(ceph_inode(inode),
+                                               CHECK_CAPS_AUTHONLY,
+                                               NULL);
+               }
        }
 
        if (ret != -EOLDSNAPC && written > 0) {
@@ -662,8 +646,7 @@ out:
  * correct atomic write, we should e.g. take write locks on all
  * objects, rollback on failure, etc.)
  */
-static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
-                              unsigned long nr_segs, size_t count)
+static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -681,7 +664,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
        int ret;
        struct timespec mtime = CURRENT_TIME;
        loff_t pos = iocb->ki_pos;
-       struct iov_iter i;
+       size_t count = iov_iter_count(from);
 
        if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
@@ -703,9 +686,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
                CEPH_OSD_FLAG_WRITE |
                CEPH_OSD_FLAG_ACK;
 
-       iov_iter_init(&i, iov, nr_segs, count, 0);
-
-       while ((len = iov_iter_count(&i)) > 0) {
+       while ((len = iov_iter_count(from)) > 0) {
                size_t left;
                int n;
 
@@ -737,13 +718,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
                left = len;
                for (n = 0; n < num_pages; n++) {
                        size_t plen = min_t(size_t, left, PAGE_SIZE);
-                       ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+                       ret = copy_page_from_iter(pages[n], 0, plen, from);
                        if (ret != plen) {
                                ret = -EFAULT;
                                break;
                        }
                        left -= ret;
-                       iov_iter_advance(&i, ret);
                }
 
                if (ret < 0) {
@@ -796,8 +776,7 @@ out:
  *
  * Hmm, the sync read case isn't actually async... should it be?
  */
-static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
-                            unsigned long nr_segs, loff_t pos)
+static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct file *filp = iocb->ki_filp;
        struct ceph_file_info *fi = filp->private_data;
@@ -823,40 +802,20 @@ again:
        if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
            (iocb->ki_filp->f_flags & O_DIRECT) ||
            (fi->flags & CEPH_F_SYNC)) {
-               struct iov_iter i;
 
                dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
                     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
                     ceph_cap_string(got));
 
-               if (!read) {
-                       ret = generic_segment_checks(iov, &nr_segs,
-                                                       &len, VERIFY_WRITE);
-                       if (ret)
-                               goto out;
-               }
-
-               iov_iter_init(&i, iov, nr_segs, len, read);
-
                /* hmm, this isn't really async... */
-               ret = ceph_sync_read(iocb, &i, &checkeof);
+               ret = ceph_sync_read(iocb, to, &checkeof);
        } else {
-               /*
-                * We can't modify the content of iov,
-                * so we only read from beginning.
-                */
-               if (read) {
-                       iocb->ki_pos = pos;
-                       len = iocb->ki_nbytes;
-                       read = 0;
-               }
                dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
-                    inode, ceph_vinop(inode), pos, (unsigned)len,
+                    inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
                     ceph_cap_string(got));
 
-               ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+               ret = generic_file_read_iter(iocb, to);
        }
-out:
        dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
             inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
        ceph_put_cap_refs(ci, got);
@@ -872,6 +831,7 @@ out:
                             ", reading more\n", iocb->ki_pos,
                             inode->i_size);
 
+                       iov_iter_advance(to, ret);
                        read += ret;
                        len -= ret;
                        checkeof = 0;
@@ -895,8 +855,7 @@ out:
  *
  * If we are near ENOSPC, write synchronously.
  */
-static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                      unsigned long nr_segs, loff_t pos)
+static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct ceph_file_info *fi = file->private_data;
@@ -904,18 +863,15 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_osd_client *osdc =
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
-       ssize_t count, written = 0;
+       ssize_t count = iov_iter_count(from), written = 0;
        int err, want, got;
+       loff_t pos = iocb->ki_pos;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
        mutex_lock(&inode->i_mutex);
 
-       err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-       if (err)
-               goto out;
-
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = file->f_mapping->backing_dev_info;
 
@@ -925,6 +881,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
        if (count == 0)
                goto out;
+       iov_iter_truncate(from, count);
 
        err = file_remove_suid(file);
        if (err)
@@ -956,23 +913,26 @@ retry_snap:
 
        if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
            (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+               struct iov_iter data;
                mutex_unlock(&inode->i_mutex);
+               /* we might need to revert back to that point */
+               data = *from;
                if (file->f_flags & O_DIRECT)
-                       written = ceph_sync_direct_write(iocb, iov,
-                                                        nr_segs, count);
+                       written = ceph_sync_direct_write(iocb, &data);
                else
-                       written = ceph_sync_write(iocb, iov, nr_segs, count);
+                       written = ceph_sync_write(iocb, &data);
                if (written == -EOLDSNAPC) {
                        dout("aio_write %p %llx.%llx %llu~%u"
                                "got EOLDSNAPC, retrying\n",
                                inode, ceph_vinop(inode),
-                               pos, (unsigned)iov->iov_len);
+                               pos, (unsigned)count);
                        mutex_lock(&inode->i_mutex);
                        goto retry_snap;
                }
+               if (written > 0)
+                       iov_iter_advance(from, written);
        } else {
                loff_t old_size = inode->i_size;
-               struct iov_iter from;
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -980,8 +940,7 @@ retry_snap:
                 * are pending vmtruncate. So write and vmtruncate
                 * can not run at the same time
                 */
-               iov_iter_init(&from, iov, nr_segs, count, 0);
-               written = generic_perform_write(file, &from, pos);
+               written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
                if (inode->i_size > old_size)
@@ -999,7 +958,7 @@ retry_snap:
        }
 
        dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-            inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+            inode, ceph_vinop(inode), pos, (unsigned)count,
             ceph_cap_string(got));
        ceph_put_cap_refs(ci, got);
 
@@ -1276,16 +1235,16 @@ const struct file_operations ceph_file_fops = {
        .open = ceph_open,
        .release = ceph_release,
        .llseek = ceph_llseek,
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = ceph_aio_read,
-       .aio_write = ceph_aio_write,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = ceph_read_iter,
+       .write_iter = ceph_write_iter,
        .mmap = ceph_mmap,
        .fsync = ceph_fsync,
        .lock = ceph_lock,
        .flock = ceph_flock,
        .splice_read = generic_file_splice_read,
-       .splice_write = generic_file_splice_write,
+       .splice_write = iter_file_splice_write,
        .unlocked_ioctl = ceph_ioctl,
        .compat_ioctl   = ceph_ioctl,
        .fallocate      = ceph_fallocate,
index 6aaa8112c538a73c82b15eaf8dd733abd21f39f0..2c90d07c0b3aa3a6db836e0290fd0ecc2137b317 100644 (file)
@@ -725,8 +725,7 @@ out_nls:
        goto out;
 }
 
-static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                  unsigned long nr_segs, loff_t pos)
+static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -737,14 +736,14 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (written)
                return written;
 
-       written = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       written = generic_file_write_iter(iocb, from);
 
        if (CIFS_CACHE_WRITE(CIFS_I(inode)))
                goto out;
 
        rc = filemap_fdatawrite(inode->i_mapping);
        if (rc)
-               cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
+               cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n",
                         rc, inode);
 
 out:
@@ -880,10 +879,10 @@ const struct inode_operations cifs_symlink_inode_ops = {
 };
 
 const struct file_operations cifs_file_ops = {
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = generic_file_aio_read,
-       .aio_write = cifs_file_aio_write,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = generic_file_read_iter,
+       .write_iter = cifs_file_write_iter,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
@@ -899,10 +898,10 @@ const struct file_operations cifs_file_ops = {
 };
 
 const struct file_operations cifs_file_strict_ops = {
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = cifs_strict_readv,
-       .aio_write = cifs_strict_writev,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = cifs_strict_readv,
+       .write_iter = cifs_strict_writev,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
@@ -919,10 +918,10 @@ const struct file_operations cifs_file_strict_ops = {
 
 const struct file_operations cifs_file_direct_ops = {
        /* BB reevaluate whether they can be done with directio, no cache */
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = cifs_user_readv,
-       .aio_write = cifs_user_writev,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = cifs_user_readv,
+       .write_iter = cifs_user_writev,
        .open = cifs_open,
        .release = cifs_close,
        .lock = cifs_lock,
@@ -938,10 +937,10 @@ const struct file_operations cifs_file_direct_ops = {
 };
 
 const struct file_operations cifs_file_nobrl_ops = {
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = generic_file_aio_read,
-       .aio_write = cifs_file_aio_write,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = generic_file_read_iter,
+       .write_iter = cifs_file_write_iter,
        .open = cifs_open,
        .release = cifs_close,
        .fsync = cifs_fsync,
@@ -956,10 +955,10 @@ const struct file_operations cifs_file_nobrl_ops = {
 };
 
 const struct file_operations cifs_file_strict_nobrl_ops = {
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = cifs_strict_readv,
-       .aio_write = cifs_strict_writev,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = cifs_strict_readv,
+       .write_iter = cifs_strict_writev,
        .open = cifs_open,
        .release = cifs_close,
        .fsync = cifs_strict_fsync,
@@ -975,10 +974,10 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
        /* BB reevaluate whether they can be done with directio, no cache */
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = cifs_user_readv,
-       .aio_write = cifs_user_writev,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = cifs_user_readv,
+       .write_iter = cifs_user_writev,
        .open = cifs_open,
        .release = cifs_close,
        .fsync = cifs_fsync,
index 8fe51166d6e3192bb8aadf2d86f5a0acf494622a..70f178a7c759525a17fc758e0637bf1d0c941ead 100644 (file)
@@ -95,14 +95,10 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
-extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-                              unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
-                                unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos);
-extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
-                                 unsigned long nr_segs, loff_t pos);
+extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
+extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, loff_t, loff_t, int);
 extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
index 208f56eca4bf4de164d8af873b0050ac4884c5ea..e90a1e9aa627642c9ccefd428319f43b3d379c2f 100644 (file)
@@ -2385,14 +2385,12 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata)
 }
 
 static ssize_t
-cifs_iovec_write(struct file *file, const struct iovec *iov,
-                unsigned long nr_segs, loff_t *poffset)
+cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
 {
        unsigned long nr_pages, i;
        size_t bytes, copied, len, cur_len;
        ssize_t total_written = 0;
        loff_t offset;
-       struct iov_iter it;
        struct cifsFileInfo *open_file;
        struct cifs_tcon *tcon;
        struct cifs_sb_info *cifs_sb;
@@ -2401,14 +2399,16 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
        int rc;
        pid_t pid;
 
-       len = iov_length(iov, nr_segs);
-       if (!len)
-               return 0;
-
+       len = iov_iter_count(from);
        rc = generic_write_checks(file, poffset, &len, 0);
        if (rc)
                return rc;
 
+       if (!len)
+               return 0;
+
+       iov_iter_truncate(from, len);
+
        INIT_LIST_HEAD(&wdata_list);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        open_file = file->private_data;
@@ -2424,7 +2424,6 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
        else
                pid = current->tgid;
 
-       iov_iter_init(&it, iov, nr_segs, len, 0);
        do {
                size_t save_len;
 
@@ -2444,11 +2443,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 
                save_len = cur_len;
                for (i = 0; i < nr_pages; i++) {
-                       bytes = min_t(const size_t, cur_len, PAGE_SIZE);
-                       copied = iov_iter_copy_from_user(wdata->pages[i], &it,
-                                                        0, bytes);
+                       bytes = min_t(size_t, cur_len, PAGE_SIZE);
+                       copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
+                                                    from);
                        cur_len -= copied;
-                       iov_iter_advance(&it, copied);
                        /*
                         * If we didn't copy as much as we expected, then that
                         * may mean we trod into an unmapped area. Stop copying
@@ -2546,11 +2544,11 @@ restart_loop:
        return total_written ? total_written : (ssize_t)rc;
 }
 
-ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos)
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 {
        ssize_t written;
        struct inode *inode;
+       loff_t pos = iocb->ki_pos;
 
        inode = file_inode(iocb->ki_filp);
 
@@ -2560,7 +2558,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
         * write request.
         */
 
-       written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
+       written = cifs_iovec_write(iocb->ki_filp, from, &pos);
        if (written > 0) {
                set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
                iocb->ki_pos = pos;
@@ -2570,8 +2568,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 }
 
 static ssize_t
-cifs_writev(struct kiocb *iocb, const struct iovec *iov,
-           unsigned long nr_segs, loff_t pos)
+cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2589,10 +2586,10 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
        mutex_lock(&inode->i_mutex);
        if (file->f_flags & O_APPEND)
                lock_pos = i_size_read(inode);
-       if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
+       if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
                                     server->vals->exclusive_lock_type, NULL,
                                     CIFS_WRITE_OP)) {
-               rc = __generic_file_aio_write(iocb, iov, nr_segs);
+               rc = __generic_file_write_iter(iocb, from);
                mutex_unlock(&inode->i_mutex);
 
                if (rc > 0) {
@@ -2610,8 +2607,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
 }
 
 ssize_t
-cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
-                  unsigned long nr_segs, loff_t pos)
+cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -2629,11 +2625,10 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
                if (cap_unix(tcon->ses) &&
                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
-                       written = generic_file_aio_write(
-                                       iocb, iov, nr_segs, pos);
+                       written = generic_file_write_iter(iocb, from);
                        goto out;
                }
-               written = cifs_writev(iocb, iov, nr_segs, pos);
+               written = cifs_writev(iocb, from);
                goto out;
        }
        /*
@@ -2642,7 +2637,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
         * affected pages because it may cause a error with mandatory locks on
         * these pages but not on the region from pos to ppos+len-1.
         */
-       written = cifs_user_writev(iocb, iov, nr_segs, pos);
+       written = cifs_user_writev(iocb, from);
        if (written > 0 && CIFS_CACHE_READ(cinode)) {
                /*
                 * Windows 7 server can delay breaking level2 oplock if a write
@@ -2831,32 +2826,25 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
        return total_read > 0 ? total_read : result;
 }
 
-ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-                              unsigned long nr_segs, loff_t pos)
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 {
        struct file *file = iocb->ki_filp;
        ssize_t rc;
        size_t len, cur_len;
        ssize_t total_read = 0;
-       loff_t offset = pos;
+       loff_t offset = iocb->ki_pos;
        unsigned int npages;
        struct cifs_sb_info *cifs_sb;
        struct cifs_tcon *tcon;
        struct cifsFileInfo *open_file;
        struct cifs_readdata *rdata, *tmp;
        struct list_head rdata_list;
-       struct iov_iter to;
        pid_t pid;
 
-       if (!nr_segs)
-               return 0;
-
-       len = iov_length(iov, nr_segs);
+       len = iov_iter_count(to);
        if (!len)
                return 0;
 
-       iov_iter_init(&to, iov, nr_segs, len, 0);
-
        INIT_LIST_HEAD(&rdata_list);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        open_file = file->private_data;
@@ -2914,7 +2902,7 @@ error:
        if (!list_empty(&rdata_list))
                rc = 0;
 
-       len = iov_iter_count(&to);
+       len = iov_iter_count(to);
        /* the loop below should proceed in the order of increasing offsets */
        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
        again:
@@ -2931,7 +2919,7 @@ error:
                                        goto again;
                                }
                        } else {
-                               rc = cifs_readdata_to_iov(rdata, &to);
+                               rc = cifs_readdata_to_iov(rdata, to);
                        }
 
                }
@@ -2939,7 +2927,7 @@ error:
                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
        }
 
-       total_read = len - iov_iter_count(&to);
+       total_read = len - iov_iter_count(to);
 
        cifs_stats_bytes_read(tcon, total_read);
 
@@ -2948,15 +2936,14 @@ error:
                rc = 0;
 
        if (total_read) {
-               iocb->ki_pos = pos + total_read;
+               iocb->ki_pos += total_read;
                return total_read;
        }
        return rc;
 }
 
 ssize_t
-cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
-                 unsigned long nr_segs, loff_t pos)
+cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
@@ -2975,22 +2962,22 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
         * pos+len-1.
         */
        if (!CIFS_CACHE_READ(cinode))
-               return cifs_user_readv(iocb, iov, nr_segs, pos);
+               return cifs_user_readv(iocb, to);
 
        if (cap_unix(tcon->ses) &&
            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
-               return generic_file_aio_read(iocb, iov, nr_segs, pos);
+               return generic_file_read_iter(iocb, to);
 
        /*
         * We need to hold the sem to be sure nobody modifies lock list
         * with a brlock that prevents reading.
         */
        down_read(&cinode->lock_sem);
-       if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
+       if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
                                     tcon->ses->server->vals->shared_lock_type,
                                     NULL, CIFS_READ_OP))
-               rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
+               rc = generic_file_read_iter(iocb, to);
        up_read(&cinode->lock_sem);
        return rc;
 }
@@ -3703,8 +3690,8 @@ void cifs_oplock_break(struct work_struct *work)
  * Direct IO is not yet supported in the cached mode. 
  */
 static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov,
-               loff_t pos, unsigned long nr_segs)
+cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
+               loff_t pos)
 {
         /*
          * FIXME
index 1792d6075b4f80ced75e04d17c02b8d362b51aac..06f65857a855725247c1190d243c0e19cccd8570 100644 (file)
@@ -532,10 +532,12 @@ static inline struct dentry *lock_parent(struct dentry *dentry)
        struct dentry *parent = dentry->d_parent;
        if (IS_ROOT(dentry))
                return NULL;
+       if (unlikely((int)dentry->d_lockref.count < 0))
+               return NULL;
        if (likely(spin_trylock(&parent->d_lock)))
                return parent;
-       spin_unlock(&dentry->d_lock);
        rcu_read_lock();
+       spin_unlock(&dentry->d_lock);
 again:
        parent = ACCESS_ONCE(dentry->d_parent);
        spin_lock(&parent->d_lock);
index 31ba0935e32ed2f271253a1d828778a91193b211..98040ba388ac1e2db62f96f253bc141758013b10 100644 (file)
@@ -77,7 +77,6 @@ struct dio_submit {
        unsigned blocks_available;      /* At block_in_file.  changes */
        int reap_counter;               /* rate limit reaping */
        sector_t final_block_in_request;/* doesn't change */
-       unsigned first_block_in_page;   /* doesn't change, Used only once */
        int boundary;                   /* prev block is at a boundary */
        get_block_t *get_block;         /* block mapping function */
        dio_submit_t *submit_io;        /* IO submition function */
@@ -98,19 +97,14 @@ struct dio_submit {
        sector_t cur_page_block;        /* Where it starts */
        loff_t cur_page_fs_offset;      /* Offset in file */
 
-       /*
-        * Page fetching state. These variables belong to dio_refill_pages().
-        */
-       int curr_page;                  /* changes */
-       int total_pages;                /* doesn't change */
-       unsigned long curr_user_address;/* changes */
-
+       struct iov_iter *iter;
        /*
         * Page queue.  These variables belong to dio_refill_pages() and
         * dio_get_page().
         */
        unsigned head;                  /* next page to process */
        unsigned tail;                  /* last valid page + 1 */
+       size_t from, to;
 };
 
 /* dio_state communicated between submission path and end_io */
@@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
  */
 static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
-       int ret;
-       int nr_pages;
+       ssize_t ret;
 
-       nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
-       ret = get_user_pages_fast(
-               sdio->curr_user_address,                /* Where from? */
-               nr_pages,                       /* How many pages? */
-               dio->rw == READ,                /* Write to memory? */
-               &dio->pages[0]);                /* Put results here */
+       ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+                               &sdio->from);
 
        if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
                struct page *page = ZERO_PAGE(0);
@@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
                dio->pages[0] = page;
                sdio->head = 0;
                sdio->tail = 1;
-               ret = 0;
-               goto out;
+               sdio->from = 0;
+               sdio->to = PAGE_SIZE;
+               return 0;
        }
 
        if (ret >= 0) {
-               sdio->curr_user_address += ret * PAGE_SIZE;
-               sdio->curr_page += ret;
+               iov_iter_advance(sdio->iter, ret);
+               ret += sdio->from;
                sdio->head = 0;
-               sdio->tail = ret;
-               ret = 0;
+               sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+               sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
+               return 0;
        }
-out:
        return ret;     
 }
 
@@ -208,8 +198,9 @@ out:
  * L1 cache.
  */
 static inline struct page *dio_get_page(struct dio *dio,
-               struct dio_submit *sdio)
+               struct dio_submit *sdio, size_t *from, size_t *to)
 {
+       int n;
        if (dio_pages_present(sdio) == 0) {
                int ret;
 
@@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio,
                        return ERR_PTR(ret);
                BUG_ON(dio_pages_present(sdio) == 0);
        }
-       return dio->pages[sdio->head++];
+       n = sdio->head++;
+       *from = n ? 0 : sdio->from;
+       *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+       return dio->pages[n];
 }
 
 /**
@@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-       while (dio_pages_present(sdio))
-               page_cache_release(dio_get_page(dio, sdio));
+       while (sdio->head < sdio->tail)
+               page_cache_release(dio->pages[sdio->head++]);
 }
 
 /*
@@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
                        struct buffer_head *map_bh)
 {
        const unsigned blkbits = sdio->blkbits;
-       const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
-       struct page *page;
-       unsigned block_in_page;
        int ret = 0;
 
-       /* The I/O can start at any block offset within the first page */
-       block_in_page = sdio->first_block_in_page;
-
        while (sdio->block_in_file < sdio->final_block_in_request) {
-               page = dio_get_page(dio, sdio);
+               struct page *page;
+               size_t from, to;
+               page = dio_get_page(dio, sdio, &from, &to);
                if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
                        goto out;
                }
 
-               while (block_in_page < blocks_per_page) {
-                       unsigned offset_in_page = block_in_page << blkbits;
+               while (from < to) {
                        unsigned this_chunk_bytes;      /* # of bytes mapped */
                        unsigned this_chunk_blocks;     /* # of blocks */
                        unsigned u;
@@ -999,10 +988,10 @@ do_holes:
                                        page_cache_release(page);
                                        goto out;
                                }
-                               zero_user(page, block_in_page << blkbits,
-                                               1 << blkbits);
+                               zero_user(page, from, 1 << blkbits);
                                sdio->block_in_file++;
-                               block_in_page++;
+                               from += 1 << blkbits;
+                               dio->result += 1 << blkbits;
                                goto next_block;
                        }
 
@@ -1019,7 +1008,7 @@ do_holes:
                         * can add to this page
                         */
                        this_chunk_blocks = sdio->blocks_available;
-                       u = (PAGE_SIZE - offset_in_page) >> blkbits;
+                       u = (to - from) >> blkbits;
                        if (this_chunk_blocks > u)
                                this_chunk_blocks = u;
                        u = sdio->final_block_in_request - sdio->block_in_file;
@@ -1031,7 +1020,7 @@ do_holes:
                        if (this_chunk_blocks == sdio->blocks_available)
                                sdio->boundary = buffer_boundary(map_bh);
                        ret = submit_page_section(dio, sdio, page,
-                                                 offset_in_page,
+                                                 from,
                                                  this_chunk_bytes,
                                                  sdio->next_block_for_io,
                                                  map_bh);
@@ -1042,7 +1031,8 @@ do_holes:
                        sdio->next_block_for_io += this_chunk_blocks;
 
                        sdio->block_in_file += this_chunk_blocks;
-                       block_in_page += this_chunk_blocks;
+                       from += this_chunk_bytes;
+                       dio->result += this_chunk_bytes;
                        sdio->blocks_available -= this_chunk_blocks;
 next_block:
                        BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
@@ -1052,7 +1042,6 @@ next_block:
 
                /* Drop the ref which was taken in get_user_pages() */
                page_cache_release(page);
-               block_in_page = 0;
        }
 out:
        return ret;
@@ -1107,24 +1096,20 @@ static inline int drop_refcount(struct dio *dio)
  */
 static inline ssize_t
 do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-       struct block_device *bdev, const struct iovec *iov, loff_t offset, 
-       unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+       struct block_device *bdev, struct iov_iter *iter, loff_t offset, 
+       get_block_t get_block, dio_iodone_t end_io,
        dio_submit_t submit_io, int flags)
 {
-       int seg;
-       size_t size;
-       unsigned long addr;
        unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
        unsigned blkbits = i_blkbits;
        unsigned blocksize_mask = (1 << blkbits) - 1;
        ssize_t retval = -EINVAL;
-       loff_t end = offset;
+       loff_t end = offset + iov_iter_count(iter);
        struct dio *dio;
        struct dio_submit sdio = { 0, };
-       unsigned long user_addr;
-       size_t bytes;
        struct buffer_head map_bh = { 0, };
        struct blk_plug plug;
+       unsigned long align = offset | iov_iter_alignment(iter);
 
        if (rw & WRITE)
                rw = WRITE_ODIRECT;
@@ -1134,32 +1119,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
         * the early prefetch in the caller enough time.
         */
 
-       if (offset & blocksize_mask) {
+       if (align & blocksize_mask) {
                if (bdev)
                        blkbits = blksize_bits(bdev_logical_block_size(bdev));
                blocksize_mask = (1 << blkbits) - 1;
-               if (offset & blocksize_mask)
+               if (align & blocksize_mask)
                        goto out;
        }
 
-       /* Check the memory alignment.  Blocks cannot straddle pages */
-       for (seg = 0; seg < nr_segs; seg++) {
-               addr = (unsigned long)iov[seg].iov_base;
-               size = iov[seg].iov_len;
-               end += size;
-               if (unlikely((addr & blocksize_mask) ||
-                            (size & blocksize_mask))) {
-                       if (bdev)
-                               blkbits = blksize_bits(
-                                        bdev_logical_block_size(bdev));
-                       blocksize_mask = (1 << blkbits) - 1;
-                       if ((addr & blocksize_mask) || (size & blocksize_mask))
-                               goto out;
-               }
-       }
-
        /* watch out for a 0 len io from a tricksy fs */
-       if (rw == READ && end == offset)
+       if (rw == READ && !iov_iter_count(iter))
                return 0;
 
        dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1249,6 +1218,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        spin_lock_init(&dio->bio_lock);
        dio->refcount = 1;
 
+       sdio.iter = iter;
+       sdio.final_block_in_request =
+               (offset + iov_iter_count(iter)) >> blkbits;
+
        /*
         * In case of non-aligned buffers, we may need 2 more
         * pages since we need to zero out first and last block.
@@ -1256,47 +1229,13 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        if (unlikely(sdio.blkfactor))
                sdio.pages_in_io = 2;
 
-       for (seg = 0; seg < nr_segs; seg++) {
-               user_addr = (unsigned long)iov[seg].iov_base;
-               sdio.pages_in_io +=
-                       ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
-                               PAGE_SIZE - user_addr / PAGE_SIZE);
-       }
+       sdio.pages_in_io += iov_iter_npages(iter, INT_MAX);
 
        blk_start_plug(&plug);
 
-       for (seg = 0; seg < nr_segs; seg++) {
-               user_addr = (unsigned long)iov[seg].iov_base;
-               sdio.size += bytes = iov[seg].iov_len;
-
-               /* Index into the first page of the first block */
-               sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
-               sdio.final_block_in_request = sdio.block_in_file +
-                                               (bytes >> blkbits);
-               /* Page fetching state */
-               sdio.head = 0;
-               sdio.tail = 0;
-               sdio.curr_page = 0;
-
-               sdio.total_pages = 0;
-               if (user_addr & (PAGE_SIZE-1)) {
-                       sdio.total_pages++;
-                       bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
-               }
-               sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
-               sdio.curr_user_address = user_addr;
-
-               retval = do_direct_IO(dio, &sdio, &map_bh);
-
-               dio->result += iov[seg].iov_len -
-                       ((sdio.final_block_in_request - sdio.block_in_file) <<
-                                       blkbits);
-
-               if (retval) {
-                       dio_cleanup(dio, &sdio);
-                       break;
-               }
-       } /* end iovec loop */
+       retval = do_direct_IO(dio, &sdio, &map_bh);
+       if (retval)
+               dio_cleanup(dio, &sdio);
 
        if (retval == -ENOTBLK) {
                /*
@@ -1365,8 +1304,8 @@ out:
 
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-       struct block_device *bdev, const struct iovec *iov, loff_t offset,
-       unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+       struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+       get_block_t get_block, dio_iodone_t end_io,
        dio_submit_t submit_io, int flags)
 {
        /*
@@ -1381,9 +1320,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        prefetch(bdev->bd_queue);
        prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
 
-       return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-                                    nr_segs, get_block, end_io,
-                                    submit_io, flags);
+       return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
+                                    get_block, end_io, submit_io, flags);
 }
 
 EXPORT_SYMBOL(__blockdev_direct_IO);
index b1eaa7a1f82cd0ce03ebb0bad15c2d8f5b6f6e9c..db0fad3269c0395f230c39cc0fcd6c9975f6d633 100644 (file)
  * The function to be used for directory reads is ecryptfs_read.
  */
 static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
-                               const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos)
+                               struct iov_iter *to)
 {
        ssize_t rc;
        struct path *path;
        struct file *file = iocb->ki_filp;
 
-       rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
+       rc = generic_file_read_iter(iocb, to);
        /*
         * Even though this is a async interface, we need to wait
         * for IO to finish to update atime
@@ -352,10 +351,10 @@ const struct file_operations ecryptfs_dir_fops = {
 
 const struct file_operations ecryptfs_main_fops = {
        .llseek = generic_file_llseek,
-       .read = do_sync_read,
-       .aio_read = ecryptfs_read_update_atime,
-       .write = do_sync_write,
-       .aio_write = generic_file_aio_write,
+       .read = new_sync_read,
+       .read_iter = ecryptfs_read_update_atime,
+       .write = new_sync_write,
+       .write_iter = generic_file_write_iter,
        .iterate = ecryptfs_readdir,
        .unlocked_ioctl = ecryptfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
index 491c6c078e7f5e0ac420646288452d93cca86ce2..71bf8e4fb5d427c660e9913a57054dec59f99dfc 100644 (file)
@@ -67,17 +67,17 @@ static int exofs_flush(struct file *file, fl_owner_t id)
 
 const struct file_operations exofs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .release        = exofs_release_file,
        .fsync          = exofs_file_fsync,
        .flush          = exofs_flush,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 const struct inode_operations exofs_file_inode_operations = {
index d1c244d676679c8d087bafeada190ce268a02c6e..3f9cafd739312bd1392b21bc6ed3dc6bb9b666a1 100644 (file)
@@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
 
  /* TODO: Should be easy enough to do proprly */
 static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
-               const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+               struct iov_iter *iter, loff_t offset)
 {
        return 0;
 }
index 44c36e5907655982cc10e26b822cc1061312fbd1..7c87b22a7228c4ce9ed3c915c64283e2aa72a328 100644 (file)
@@ -62,10 +62,10 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  */
 const struct file_operations ext2_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext2_compat_ioctl,
@@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = {
        .release        = ext2_release_file,
        .fsync          = ext2_fsync,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
index b1d2a4675d4280e10ff10f9f89ffaa25d109d920..36d35c36311d69a025c5b804e8d8597cbd9cb2b2 100644 (file)
@@ -850,18 +850,18 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t
-ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs)
+ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+                       loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                ext2_get_block);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block);
        if (ret < 0 && (rw & WRITE))
-               ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
+               ext2_write_failed(mapping, offset + count);
        return ret;
 }
 
index aad05311392a046f3df724102580c511185c0fa2..a062fa1e1b113e56e7545bb4c6bac7ac1bf7d741 100644 (file)
@@ -50,10 +50,10 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 
 const struct file_operations ext3_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .unlocked_ioctl = ext3_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext3_compat_ioctl,
@@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = {
        .release        = ext3_release_file,
        .fsync          = ext3_sync_file,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 const struct inode_operations ext3_file_inode_operations = {
index 695abe738a2409f4c32f4ef7d5749757d98b6f15..2c6ccc49ba279cacf77fe6609fe44a50b970898c 100644 (file)
@@ -1821,8 +1821,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * VFS code falls back into buffered path in that case so we are safe.
  */
 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
-                       const struct iovec *iov, loff_t offset,
-                       unsigned long nr_segs)
+                       struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
@@ -1830,10 +1829,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
        handle_t *handle;
        ssize_t ret;
        int orphan = 0;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        int retries = 0;
 
-       trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+       trace_ext3_direct_IO_enter(inode, offset, count, rw);
 
        if (rw == WRITE) {
                loff_t final_size = offset + count;
@@ -1857,15 +1856,14 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
        }
 
 retry:
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                ext3_get_block);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
        /*
         * In case of error extending write may have instantiated a few
         * blocks outside i_size. Trim these off again.
         */
        if (unlikely((rw & WRITE) && ret < 0)) {
                loff_t isize = i_size_read(inode);
-               loff_t end = offset + iov_length(iov, nr_segs);
+               loff_t end = offset + count;
 
                if (end > isize)
                        ext3_truncate_failed_direct_write(inode);
@@ -1910,8 +1908,7 @@ retry:
                        ret = err;
        }
 out:
-       trace_ext3_direct_IO_exit(inode, offset,
-                               iov_length(iov, nr_segs), rw, ret);
+       trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
        return ret;
 }
 
index 1479e2ae00d28e83e8d1c175752a61b59828cd55..7cc5a0e23688e1a2ce071dcb646b725a266ff890 100644 (file)
@@ -2140,8 +2140,7 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
 extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
                                struct ext4_map_blocks *map, int flags);
 extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-                               const struct iovec *iov, loff_t offset,
-                               unsigned long nr_segs);
+                               struct iov_iter *iter, loff_t offset);
 extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
 extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
 extern void ext4_ind_truncate(handle_t *, struct inode *inode);
index 4e8bc284ec0e96296e8bbcf68423b9ea9ee8c921..8695f70af1ef2046c2f68a24a5ed4e195cd6dc88 100644 (file)
@@ -74,26 +74,22 @@ static void ext4_unwritten_wait(struct inode *inode)
  * or one thread will zero the other's data, causing corruption.
  */
 static int
-ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
-                  unsigned long nr_segs, loff_t pos)
+ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
 {
        struct super_block *sb = inode->i_sb;
        int blockmask = sb->s_blocksize - 1;
-       size_t count = iov_length(iov, nr_segs);
-       loff_t final_size = pos + count;
 
        if (pos >= i_size_read(inode))
                return 0;
 
-       if ((pos & blockmask) || (final_size & blockmask))
+       if ((pos | iov_iter_alignment(from)) & blockmask)
                return 1;
 
        return 0;
 }
 
 static ssize_t
-ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(iocb->ki_filp);
@@ -101,10 +97,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
        struct blk_plug plug;
        int o_direct = file->f_flags & O_DIRECT;
        int overwrite = 0;
-       size_t length = iov_length(iov, nr_segs);
+       size_t length = iov_iter_count(from);
        ssize_t ret;
-
-       BUG_ON(iocb->ki_pos != pos);
+       loff_t pos = iocb->ki_pos;
 
        /*
         * Unaligned direct AIO must be serialized; see comment above
@@ -114,7 +109,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
            ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
            !is_sync_kiocb(iocb) &&
            (file->f_flags & O_APPEND ||
-            ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
+            ext4_unaligned_aio(inode, from, pos))) {
                aio_mutex = ext4_aio_mutex(inode);
                mutex_lock(aio_mutex);
                ext4_unwritten_wait(inode);
@@ -138,10 +133,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                        goto errout;
                }
 
-               if (pos + length > sbi->s_bitmap_maxbytes) {
-                       nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
-                                             sbi->s_bitmap_maxbytes - pos);
-               }
+               if (pos + length > sbi->s_bitmap_maxbytes)
+                       iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
        }
 
        if (o_direct) {
@@ -179,7 +172,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                }
        }
 
-       ret = __generic_file_aio_write(iocb, iov, nr_segs);
+       ret = __generic_file_write_iter(iocb, from);
        mutex_unlock(&inode->i_mutex);
 
        if (ret > 0) {
@@ -594,10 +587,10 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
 
 const struct file_operations ext4_file_operations = {
        .llseek         = ext4_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = ext4_file_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = ext4_file_write_iter,
        .unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
@@ -607,7 +600,7 @@ const struct file_operations ext4_file_operations = {
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fallocate      = ext4_fallocate,
 };
 
index 594009f5f523f0fd2228a72f1aa593a6f3ebf66f..8a57e9fcd1b987bdab029e7658ae100d10949d5a 100644 (file)
@@ -639,8 +639,7 @@ out:
  * VFS code falls back into buffered path in that case so we are safe.
  */
 ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-                          const struct iovec *iov, loff_t offset,
-                          unsigned long nr_segs)
+                          struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
@@ -648,7 +647,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
        handle_t *handle;
        ssize_t ret;
        int orphan = 0;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        int retries = 0;
 
        if (rw == WRITE) {
@@ -687,18 +686,17 @@ retry:
                        goto locked;
                }
                ret = __blockdev_direct_IO(rw, iocb, inode,
-                                inode->i_sb->s_bdev, iov,
-                                offset, nr_segs,
+                                inode->i_sb->s_bdev, iter, offset,
                                 ext4_get_block, NULL, NULL, 0);
                inode_dio_done(inode);
        } else {
 locked:
-               ret = blockdev_direct_IO(rw, iocb, inode, iov,
-                                offset, nr_segs, ext4_get_block);
+               ret = blockdev_direct_IO(rw, iocb, inode, iter,
+                                offset, ext4_get_block);
 
                if (unlikely((rw & WRITE) && ret < 0)) {
                        loff_t isize = i_size_read(inode);
-                       loff_t end = offset + iov_length(iov, nr_segs);
+                       loff_t end = offset + count;
 
                        if (end > isize)
                                ext4_truncate_failed_write(inode);
index 7fcd68ee915500cd53ef79cbea1c187e71024004..8a064734e6eb3ed06461e9954d036da6ff1e8147 100644 (file)
@@ -3093,13 +3093,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
  *
  */
 static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        int overwrite = 0;
        get_block_t *get_block_func = NULL;
        int dio_flags = 0;
@@ -3108,7 +3107,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 
        /* Use the old path for reads and writes beyond i_size. */
        if (rw != WRITE || final_size > inode->i_size)
-               return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               return ext4_ind_direct_IO(rw, iocb, iter, offset);
 
        BUG_ON(iocb->private == NULL);
 
@@ -3175,8 +3174,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                dio_flags = DIO_LOCKING;
        }
        ret = __blockdev_direct_IO(rw, iocb, inode,
-                                  inode->i_sb->s_bdev, iov,
-                                  offset, nr_segs,
+                                  inode->i_sb->s_bdev, iter,
+                                  offset,
                                   get_block_func,
                                   ext4_end_io_dio,
                                   NULL,
@@ -3230,11 +3229,11 @@ retake_lock:
 }
 
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
        /*
@@ -3247,13 +3246,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
        if (ext4_has_inline_data(inode))
                return 0;
 
-       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+       trace_ext4_direct_IO_enter(inode, offset, count, rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
        else
-               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
-       trace_ext4_direct_IO_exit(inode, offset,
-                               iov_length(iov, nr_segs), rw, ret);
+               ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
+       trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
        return ret;
 }
 
index c1fb6dd10911c01e9b37d533a7588ee6bf934ecb..0924521306b40c5087f2c2170c92fe7b03452862 100644 (file)
@@ -1017,10 +1017,9 @@ static int f2fs_write_end(struct file *file,
 }
 
 static int check_direct_IO(struct inode *inode, int rw,
-               const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+               struct iov_iter *iter, loff_t offset)
 {
        unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
-       int i;
 
        if (rw == READ)
                return 0;
@@ -1028,14 +1027,14 @@ static int check_direct_IO(struct inode *inode, int rw,
        if (offset & blocksize_mask)
                return -EINVAL;
 
-       for (i = 0; i < nr_segs; i++)
-               if (iov[i].iov_len & blocksize_mask)
-                       return -EINVAL;
+       if (iov_iter_alignment(iter) & blocksize_mask)
+               return -EINVAL;
+
        return 0;
 }
 
 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
-               const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+               struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
@@ -1044,14 +1043,14 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
        if (f2fs_has_inline_data(inode))
                return 0;
 
-       if (check_direct_IO(inode, rw, iov, offset, nr_segs))
+       if (check_direct_IO(inode, rw, iter, offset))
                return 0;
 
        /* clear fsync mark to recover these blocks */
        fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
 
-       return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                                       get_data_block);
+       return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+                                 get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
index 9c49c593d8eb4ab39a1aa28c1b841f949d02c050..c58e330757191392656d2819fd937a1cc564cb37 100644 (file)
@@ -808,10 +808,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 const struct file_operations f2fs_file_operations = {
        .llseek         = f2fs_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .open           = generic_file_open,
        .mmap           = f2fs_file_mmap,
        .fsync          = f2fs_sync_file,
@@ -821,5 +821,5 @@ const struct file_operations f2fs_file_operations = {
        .compat_ioctl   = f2fs_compat_ioctl,
 #endif
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
index 9b104f543056238016c683ef822046a784169f50..85f79a89e7474658c8c552cfe027e72ff048d542 100644 (file)
@@ -170,10 +170,10 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 
 const struct file_operations fat_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .release        = fat_file_release,
        .unlocked_ioctl = fat_generic_ioctl,
index 9c83594d7fb5dbb03e5eb7ca378c4b35ffd3d85f..756aead10d9618593e3267e697df4915d528fbc9 100644 (file)
@@ -247,12 +247,13 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
-                            const struct iovec *iov,
-                            loff_t offset, unsigned long nr_segs)
+                            struct iov_iter *iter,
+                            loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
        if (rw == WRITE) {
@@ -265,7 +266,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
                 *
                 * Return 0, and fallback to normal buffered write.
                 */
-               loff_t size = offset + iov_length(iov, nr_segs);
+               loff_t size = offset + count;
                if (MSDOS_I(inode)->mmu_private < size)
                        return 0;
        }
@@ -274,10 +275,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
         * FAT need to use the DIO_LOCKING for avoiding the race
         * condition of fat_get_block() and ->truncate().
         */
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                fat_get_block);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
        if (ret < 0 && (rw & WRITE))
-               fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
+               fat_write_failed(mapping, offset + count);
 
        return ret;
 }
index 8f294cfac69749024c2c2e19d4b156755130e9ed..66923fe3176e49b03617e2f5477bb8e05fc89dc3 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -44,15 +44,10 @@ static void *alloc_fdmem(size_t size)
        return vmalloc(size);
 }
 
-static void free_fdmem(void *ptr)
-{
-       is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr);
-}
-
 static void __free_fdtable(struct fdtable *fdt)
 {
-       free_fdmem(fdt->fd);
-       free_fdmem(fdt->open_fds);
+       kvfree(fdt->fd);
+       kvfree(fdt->open_fds);
        kfree(fdt);
 }
 
@@ -130,7 +125,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
        return fdt;
 
 out_arr:
-       free_fdmem(fdt->fd);
+       kvfree(fdt->fd);
 out_fdt:
        kfree(fdt);
 out:
index 40bf4660f0a3aa18bf881c2b3c09dea95e0808ec..385bfd31512a17f4e4c6869a3ee8f32c456cd327 100644 (file)
@@ -175,6 +175,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
        file->f_path = *path;
        file->f_inode = path->dentry->d_inode;
        file->f_mapping = path->dentry->d_inode->i_mapping;
+       if ((mode & FMODE_READ) &&
+            likely(fop->read || fop->aio_read || fop->read_iter))
+               mode |= FMODE_CAN_READ;
+       if ((mode & FMODE_WRITE) &&
+            likely(fop->write || fop->aio_write || fop->write_iter))
+               mode |= FMODE_CAN_WRITE;
        file->f_mode = mode;
        file->f_op = fop;
        if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
index 13b691a8a7d2ea4403161213486538dadf75a217..966ace8b243fa39796fbdd788c4d83ebd6024217 100644 (file)
@@ -94,8 +94,10 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
        loff_t pos = 0;
        struct iovec iov = { .iov_base = buf, .iov_len = count };
        struct fuse_io_priv io = { .async = 0, .file = file };
+       struct iov_iter ii;
+       iov_iter_init(&ii, READ, &iov, 1, count);
 
-       return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE);
+       return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE);
 }
 
 static ssize_t cuse_write(struct file *file, const char __user *buf,
@@ -104,12 +106,14 @@ static ssize_t cuse_write(struct file *file, const char __user *buf,
        loff_t pos = 0;
        struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
        struct fuse_io_priv io = { .async = 0, .file = file };
+       struct iov_iter ii;
+       iov_iter_init(&ii, WRITE, &iov, 1, count);
 
        /*
         * No locking or generic_write_checks(), the server is
         * responsible for locking and sanity checks.
         */
-       return fuse_direct_io(&io, &iov, 1, count, &pos,
+       return fuse_direct_io(&io, &ii, &pos,
                              FUSE_DIO_WRITE | FUSE_DIO_CUSE);
 }
 
index 903cbc9cd6bd3a471f565e9fd3e2115539b58aca..6e16dad13e9b16de0358f8caaec9833d9f00a84b 100644 (file)
@@ -933,8 +933,7 @@ out:
        return err;
 }
 
-static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-                                 unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct inode *inode = iocb->ki_filp->f_mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -945,14 +944,14 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
         * i_size is up to date).
         */
        if (fc->auto_inval_data ||
-           (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
+           (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
                int err;
                err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
                if (err)
                        return err;
        }
 
-       return generic_file_aio_read(iocb, iov, nr_segs, pos);
+       return generic_file_read_iter(iocb, to);
 }
 
 static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -1181,19 +1180,17 @@ static ssize_t fuse_perform_write(struct file *file,
        return res > 0 ? res : err;
 }
 
-static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                  unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
-       size_t count = 0;
-       size_t ocount = 0;
+       size_t count = iov_iter_count(from);
        ssize_t written = 0;
        ssize_t written_buffered = 0;
        struct inode *inode = mapping->host;
        ssize_t err;
-       struct iov_iter i;
        loff_t endbyte = 0;
+       loff_t pos = iocb->ki_pos;
 
        if (get_fuse_conn(inode)->writeback_cache) {
                /* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1201,17 +1198,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (err)
                        return err;
 
-               return generic_file_aio_write(iocb, iov, nr_segs, pos);
+               return generic_file_write_iter(iocb, from);
        }
 
-       WARN_ON(iocb->ki_pos != pos);
-
-       ocount = 0;
-       err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-       if (err)
-               return err;
-
-       count = ocount;
        mutex_lock(&inode->i_mutex);
 
        /* We can write back this queue in page reclaim */
@@ -1224,6 +1213,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (count == 0)
                goto out;
 
+       iov_iter_truncate(from, count);
        err = file_remove_suid(file);
        if (err)
                goto out;
@@ -1233,16 +1223,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                goto out;
 
        if (file->f_flags & O_DIRECT) {
-               written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
-                                                   count, ocount);
-               if (written < 0 || written == count)
+               written = generic_file_direct_write(iocb, from, pos);
+               if (written < 0 || !iov_iter_count(from))
                        goto out;
 
                pos += written;
-               count -= written;
 
-               iov_iter_init(&i, iov, nr_segs, count, written);
-               written_buffered = fuse_perform_write(file, mapping, &i, pos);
+               written_buffered = fuse_perform_write(file, mapping, from, pos);
                if (written_buffered < 0) {
                        err = written_buffered;
                        goto out;
@@ -1261,8 +1248,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                written += written_buffered;
                iocb->ki_pos = pos + written_buffered;
        } else {
-               iov_iter_init(&i, iov, nr_segs, count, 0);
-               written = fuse_perform_write(file, mapping, &i, pos);
+               written = fuse_perform_write(file, mapping, from, pos);
                if (written >= 0)
                        iocb->ki_pos = pos + written;
        }
@@ -1300,7 +1286,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
        size_t nbytes = 0;  /* # bytes already packed in req */
 
        /* Special case for kernel I/O: can copy directly into the buffer */
-       if (segment_eq(get_fs(), KERNEL_DS)) {
+       if (ii->type & ITER_KVEC) {
                unsigned long user_addr = fuse_get_user_addr(ii);
                size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
@@ -1316,35 +1302,26 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 
        while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
                unsigned npages;
-               unsigned long user_addr = fuse_get_user_addr(ii);
-               unsigned offset = user_addr & ~PAGE_MASK;
-               size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
-               int ret;
-
+               size_t start;
                unsigned n = req->max_pages - req->num_pages;
-               frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
-
-               npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               npages = clamp(npages, 1U, n);
-
-               ret = get_user_pages_fast(user_addr, npages, !write,
-                                         &req->pages[req->num_pages]);
+               ssize_t ret = iov_iter_get_pages(ii,
+                                       &req->pages[req->num_pages],
+                                       n * PAGE_SIZE, &start);
                if (ret < 0)
                        return ret;
 
-               npages = ret;
-               frag_size = min_t(size_t, frag_size,
-                                 (npages << PAGE_SHIFT) - offset);
-               iov_iter_advance(ii, frag_size);
+               iov_iter_advance(ii, ret);
+               nbytes += ret;
+
+               ret += start;
+               npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
 
-               req->page_descs[req->num_pages].offset = offset;
+               req->page_descs[req->num_pages].offset = start;
                fuse_page_descs_length_init(req, req->num_pages, npages);
 
                req->num_pages += npages;
                req->page_descs[req->num_pages - 1].length -=
-                       (npages << PAGE_SHIFT) - offset - frag_size;
-
-               nbytes += frag_size;
+                       (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
        }
 
        if (write)
@@ -1359,24 +1336,11 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 
 static inline int fuse_iter_npages(const struct iov_iter *ii_p)
 {
-       struct iov_iter ii = *ii_p;
-       int npages = 0;
-
-       while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
-               unsigned long user_addr = fuse_get_user_addr(&ii);
-               unsigned offset = user_addr & ~PAGE_MASK;
-               size_t frag_size = iov_iter_single_seg_count(&ii);
-
-               npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               iov_iter_advance(&ii, frag_size);
-       }
-
-       return min(npages, FUSE_MAX_PAGES_PER_REQ);
+       return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
 }
 
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
-                      unsigned long nr_segs, size_t count, loff_t *ppos,
-                      int flags)
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+                      loff_t *ppos, int flags)
 {
        int write = flags & FUSE_DIO_WRITE;
        int cuse = flags & FUSE_DIO_CUSE;
@@ -1386,18 +1350,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
        struct fuse_conn *fc = ff->fc;
        size_t nmax = write ? fc->max_write : fc->max_read;
        loff_t pos = *ppos;
+       size_t count = iov_iter_count(iter);
        pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
        pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
        ssize_t res = 0;
        struct fuse_req *req;
-       struct iov_iter ii;
-
-       iov_iter_init(&ii, iov, nr_segs, count, 0);
 
        if (io->async)
-               req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
+               req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
        else
-               req = fuse_get_req(fc, fuse_iter_npages(&ii));
+               req = fuse_get_req(fc, fuse_iter_npages(iter));
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -1413,7 +1375,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
                size_t nres;
                fl_owner_t owner = current->files;
                size_t nbytes = min(count, nmax);
-               int err = fuse_get_user_pages(req, &ii, &nbytes, write);
+               int err = fuse_get_user_pages(req, iter, &nbytes, write);
                if (err) {
                        res = err;
                        break;
@@ -1443,9 +1405,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
                        fuse_put_request(fc, req);
                        if (io->async)
                                req = fuse_get_req_for_background(fc,
-                                       fuse_iter_npages(&ii));
+                                       fuse_iter_npages(iter));
                        else
-                               req = fuse_get_req(fc, fuse_iter_npages(&ii));
+                               req = fuse_get_req(fc, fuse_iter_npages(iter));
                        if (IS_ERR(req))
                                break;
                }
@@ -1460,9 +1422,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
 EXPORT_SYMBOL_GPL(fuse_direct_io);
 
 static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
-                                 const struct iovec *iov,
-                                 unsigned long nr_segs, loff_t *ppos,
-                                 size_t count)
+                                 struct iov_iter *iter,
+                                 loff_t *ppos)
 {
        ssize_t res;
        struct file *file = io->file;
@@ -1471,7 +1432,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
        if (is_bad_inode(inode))
                return -EIO;
 
-       res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0);
+       res = fuse_direct_io(io, iter, ppos, 0);
 
        fuse_invalidate_attr(inode);
 
@@ -1483,22 +1444,26 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 {
        struct fuse_io_priv io = { .async = 0, .file = file };
        struct iovec iov = { .iov_base = buf, .iov_len = count };
-       return __fuse_direct_read(&io, &iov, 1, ppos, count);
+       struct iov_iter ii;
+       iov_iter_init(&ii, READ, &iov, 1, count);
+       return __fuse_direct_read(&io, &ii, ppos);
 }
 
 static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
-                                  const struct iovec *iov,
-                                  unsigned long nr_segs, loff_t *ppos)
+                                  struct iov_iter *iter,
+                                  loff_t *ppos)
 {
        struct file *file = io->file;
        struct inode *inode = file_inode(file);
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        ssize_t res;
 
+
        res = generic_write_checks(file, ppos, &count, 0);
-       if (!res)
-               res = fuse_direct_io(io, iov, nr_segs, count, ppos,
-                                    FUSE_DIO_WRITE);
+       if (!res) {
+               iov_iter_truncate(iter, count);
+               res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
+       }
 
        fuse_invalidate_attr(inode);
 
@@ -1512,13 +1477,15 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
        struct inode *inode = file_inode(file);
        ssize_t res;
        struct fuse_io_priv io = { .async = 0, .file = file };
+       struct iov_iter ii;
+       iov_iter_init(&ii, WRITE, &iov, 1, count);
 
        if (is_bad_inode(inode))
                return -EIO;
 
        /* Don't allow parallel writes to the same file */
        mutex_lock(&inode->i_mutex);
-       res = __fuse_direct_write(&io, &iov, 1, ppos);
+       res = __fuse_direct_write(&io, &ii, ppos);
        if (res > 0)
                fuse_write_update_size(inode, *ppos);
        mutex_unlock(&inode->i_mutex);
@@ -2372,7 +2339,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
        if (!bytes)
                return 0;
 
-       iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+       iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
 
        while (iov_iter_count(&ii)) {
                struct page *page = pages[page_idx++];
@@ -2894,8 +2861,8 @@ static inline loff_t fuse_round_up(loff_t off)
 }
 
 static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs)
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+                       loff_t offset)
 {
        ssize_t ret = 0;
        struct file *file = iocb->ki_filp;
@@ -2904,7 +2871,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        loff_t pos = 0;
        struct inode *inode;
        loff_t i_size;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        struct fuse_io_priv *io;
 
        pos = offset;
@@ -2919,6 +2886,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                if (offset >= i_size)
                        return 0;
                count = min_t(loff_t, count, fuse_round_up(i_size - offset));
+               iov_iter_truncate(iter, count);
        }
 
        io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2948,9 +2916,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
                io->async = false;
 
        if (rw == WRITE)
-               ret = __fuse_direct_write(io, iov, nr_segs, &pos);
+               ret = __fuse_direct_write(io, iter, &pos);
        else
-               ret = __fuse_direct_read(io, iov, nr_segs, &pos, count);
+               ret = __fuse_direct_read(io, iter, &pos);
 
        if (io->async) {
                fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -3061,10 +3029,10 @@ out:
 
 static const struct file_operations fuse_file_operations = {
        .llseek         = fuse_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = fuse_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = fuse_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = fuse_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = fuse_file_write_iter,
        .mmap           = fuse_file_mmap,
        .open           = fuse_open,
        .flush          = fuse_flush,
index 7aa5c75e0de13dcc9728ba890983e842c1a98554..e8e47a6ab5186be8df5889d65df28bc92bf8c97d 100644 (file)
@@ -880,9 +880,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */
 #define FUSE_DIO_CUSE  (1 << 1)
 
-ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
-                      unsigned long nr_segs, size_t count, loff_t *ppos,
-                      int flags);
+ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
+                      loff_t *ppos, int flags);
 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
                   unsigned int flags);
 long fuse_ioctl_common(struct file *file, unsigned int cmd,
index 492123cda64ab5d325db6a640d29d7640eeb6f10..805b37fed6383fc71abcb573de809ee8f3e41c53 100644 (file)
@@ -1040,8 +1040,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
 
 
 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
-                             const struct iovec *iov, loff_t offset,
-                             unsigned long nr_segs)
+                             struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
@@ -1081,7 +1080,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
         */
        if (mapping->nrpages) {
                loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
-               loff_t len = iov_length(iov, nr_segs);
+               loff_t len = iov_iter_count(iter);
                loff_t end = PAGE_ALIGN(offset + len) - 1;
 
                rv = 0;
@@ -1096,9 +1095,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
                        truncate_inode_pages_range(mapping, lstart, end);
        }
 
-       rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-                                 offset, nr_segs, gfs2_get_block_direct,
-                                 NULL, NULL, 0);
+       rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+                                 iter, offset,
+                                 gfs2_get_block_direct, NULL, NULL, 0);
 out:
        gfs2_glock_dq(&gh);
        gfs2_holder_uninit(&gh);
index 6ab0cfb2e891014436816e7ce2021a745291d2d6..4fc3a3046174dc9a296c90a0d0ca6d53485e277b 100644 (file)
@@ -684,7 +684,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 }
 
 /**
- * gfs2_file_aio_write - Perform a write to a file
+ * gfs2_file_write_iter - Perform a write to a file
  * @iocb: The io context
  * @iov: The data to write
  * @nr_segs: Number of @iov segments
@@ -697,11 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
  *
  */
 
-static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                  unsigned long nr_segs, loff_t pos)
+static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
-       size_t writesize = iov_length(iov, nr_segs);
        struct gfs2_inode *ip = GFS2_I(file_inode(file));
        int ret;
 
@@ -709,7 +707,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (ret)
                return ret;
 
-       gfs2_size_hint(file, pos, writesize);
+       gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
 
        if (file->f_flags & O_APPEND) {
                struct gfs2_holder gh;
@@ -720,7 +718,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                gfs2_glock_dq_uninit(&gh);
        }
 
-       return generic_file_aio_write(iocb, iov, nr_segs, pos);
+       return generic_file_write_iter(iocb, from);
 }
 
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -1058,10 +1056,10 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 
 const struct file_operations gfs2_file_fops = {
        .llseek         = gfs2_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = gfs2_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = gfs2_file_write_iter,
        .unlocked_ioctl = gfs2_ioctl,
        .mmap           = gfs2_mmap,
        .open           = gfs2_open,
@@ -1070,7 +1068,7 @@ const struct file_operations gfs2_file_fops = {
        .lock           = gfs2_lock,
        .flock          = gfs2_flock,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .setlease       = gfs2_setlease,
        .fallocate      = gfs2_fallocate,
 };
@@ -1090,17 +1088,17 @@ const struct file_operations gfs2_dir_fops = {
 
 const struct file_operations gfs2_file_fops_nolock = {
        .llseek         = gfs2_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = gfs2_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = gfs2_file_write_iter,
        .unlocked_ioctl = gfs2_ioctl,
        .mmap           = gfs2_mmap,
        .open           = gfs2_open,
        .release        = gfs2_release,
        .fsync          = gfs2_fsync,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .setlease       = generic_setlease,
        .fallocate      = gfs2_fallocate,
 };
index 9e2fecd62f6245b1a48afe12aac49c5d2573fbf2..d0929bc817826e012cc829bb0f021832eea24379 100644 (file)
@@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 }
 
 static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
-               const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+               struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = file_inode(file)->i_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                hfs_get_block);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
 
        /*
         * In case of error extending write may have instantiated a few
@@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
         */
        if (unlikely((rw & WRITE) && ret < 0)) {
                loff_t isize = i_size_read(inode);
-               loff_t end = offset + iov_length(iov, nr_segs);
+               loff_t end = offset + count;
 
                if (end > isize)
                        hfs_write_failed(mapping, end);
@@ -674,10 +674,10 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 
 static const struct file_operations hfs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .splice_read    = generic_file_splice_read,
        .fsync          = hfs_file_fsync,
index a4f45bd88a631ad5a153a8f45fdd1044a3637c1c..0cf786f2d046f9fbae9b110a2a2d212c008fb3aa 100644 (file)
@@ -123,14 +123,15 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 }
 
 static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
-               const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+               struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = file_inode(file)->i_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 
                                 hfsplus_get_block);
 
        /*
@@ -139,7 +140,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
         */
        if (unlikely((rw & WRITE) && ret < 0)) {
                loff_t isize = i_size_read(inode);
-               loff_t end = offset + iov_length(iov, nr_segs);
+               loff_t end = offset + count;
 
                if (end > isize)
                        hfsplus_write_failed(mapping, end);
@@ -340,10 +341,10 @@ static const struct inode_operations hfsplus_file_inode_operations = {
 
 static const struct file_operations hfsplus_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .splice_read    = generic_file_splice_read,
        .fsync          = hfsplus_file_fsync,
index 9c470fde9878eae280eeb62b7ef9ca3c263c9b4c..bb529f3b7f2bf8a119ec5f2eba3c260e09c361c7 100644 (file)
@@ -378,11 +378,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
 
 static const struct file_operations hostfs_file_fops = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
+       .read           = new_sync_read,
        .splice_read    = generic_file_splice_read,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
-       .write          = do_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
+       .write          = new_sync_write,
        .mmap           = generic_file_mmap,
        .open           = hostfs_file_open,
        .release        = hostfs_file_release,
index 67c1a61e09558e0bb632638b0f65d8316ab9b5f2..7f54e5f76cececd4bf76354edb13eb246a81df78 100644 (file)
@@ -197,10 +197,10 @@ const struct address_space_operations hpfs_aops = {
 const struct file_operations hpfs_file_ops =
 {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .release        = hpfs_file_release,
        .fsync          = hpfs_file_fsync,
index 256cd19a3b78c006a1439f893b1b51a0341c938a..64989ca9ba90b71e3a8d16ff90dda67a913cc988 100644 (file)
@@ -51,10 +51,10 @@ const struct file_operations jffs2_file_operations =
 {
        .llseek =       generic_file_llseek,
        .open =         generic_file_open,
-       .read =         do_sync_read,
-       .aio_read =     generic_file_aio_read,
-       .write =        do_sync_write,
-       .aio_write =    generic_file_aio_write,
+       .read =         new_sync_read,
+       .read_iter =    generic_file_read_iter,
+       .write =        new_sync_write,
+       .write_iter =   generic_file_write_iter,
        .unlocked_ioctl=jffs2_ioctl,
        .mmap =         generic_file_readonly_mmap,
        .fsync =        jffs2_fsync,
index 794da944d5cd29c63d8db31040340e83d7079d87..33aa0cc1f8b863b7b101bec470af4238d0c6eeb1 100644 (file)
@@ -151,13 +151,13 @@ const struct inode_operations jfs_file_inode_operations = {
 const struct file_operations jfs_file_operations = {
        .open           = jfs_open,
        .llseek         = generic_file_llseek,
-       .write          = do_sync_write,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .write          = new_sync_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fsync          = jfs_fsync,
        .release        = jfs_release,
        .unlocked_ioctl = jfs_ioctl,
index 6f8fe72c2a7ae201be639a94e0aca529e9270019..bd3df1ca3c9b7f955571c056f86f98e97beda7b9 100644 (file)
@@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
-       const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+       struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = file->f_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                jfs_get_block);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
 
        /*
         * In case of error extending write may have instantiated a few
@@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
         */
        if (unlikely((rw & WRITE) && ret < 0)) {
                loff_t isize = i_size_read(inode);
-               loff_t end = offset + iov_length(iov, nr_segs);
+               loff_t end = offset + count;
 
                if (end > isize)
                        jfs_write_failed(mapping, end);
index 57914fc32b62538f43909d35ffc031742b98a881..8538752df2f6a7dbb3dad0119e4e8bc47f4a25f4 100644 (file)
@@ -264,15 +264,15 @@ const struct inode_operations logfs_reg_iops = {
 };
 
 const struct file_operations logfs_reg_fops = {
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .fsync          = logfs_fsync,
        .unlocked_ioctl = logfs_ioctl,
        .llseek         = generic_file_llseek,
        .mmap           = generic_file_readonly_mmap,
        .open           = generic_file_open,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
 };
 
 const struct address_space_operations logfs_reg_aops = {
index adc6f5494231bc947f45d8a3c526db0b36f39bf3..a967de085ac0f4cf7193101cd4e54a08bb4fff50 100644 (file)
  */
 const struct file_operations minix_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .fsync          = generic_file_fsync,
        .splice_read    = generic_file_splice_read,
index 4ad7bc3886791b0078ebc3ae4b326ed5e4c6566b..8f98138cbc4385ba63b3af77ae907219d22e6991 100644 (file)
@@ -212,20 +212,20 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
  * shunt off direct read and write requests before the VFS gets them,
  * so this method is only ever called for swap.
  */
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 #ifndef CONFIG_NFS_SWAP
        dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
-                       iocb->ki_filp, (long long) pos, nr_segs);
+                       iocb->ki_filp, (long long) pos, iter->nr_segs);
 
        return -EINVAL;
 #else
        VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
        if (rw == READ || rw == KERNEL_READ)
-               return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+               return nfs_file_direct_read(iocb, iter, pos,
                                rw == READ ? true : false);
-       return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+       return nfs_file_direct_write(iocb, iter, pos,
                                rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
@@ -414,60 +414,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
-                                               const struct iovec *iov,
-                                               loff_t pos, bool uio)
-{
-       struct nfs_direct_req *dreq = desc->pg_dreq;
-       struct nfs_open_context *ctx = dreq->ctx;
-       struct inode *inode = ctx->dentry->d_inode;
-       unsigned long user_addr = (unsigned long)iov->iov_base;
-       size_t count = iov->iov_len;
-       size_t rsize = NFS_SERVER(inode)->rsize;
-       unsigned int pgbase;
-       int result;
-       ssize_t started = 0;
-       struct page **pagevec = NULL;
-       unsigned int npages;
-
-       do {
-               size_t bytes;
-               int i;
 
-               pgbase = user_addr & ~PAGE_MASK;
-               bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+                                             struct iov_iter *iter,
+                                             loff_t pos)
+{
+       struct nfs_pageio_descriptor desc;
+       struct inode *inode = dreq->inode;
+       ssize_t result = -EINVAL;
+       size_t requested_bytes = 0;
+       size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
 
-               result = -ENOMEM;
-               npages = nfs_page_array_len(pgbase, bytes);
-               if (!pagevec)
-                       pagevec = kmalloc(npages * sizeof(struct page *),
-                                         GFP_KERNEL);
-               if (!pagevec)
-                       break;
-               if (uio) {
-                       down_read(&current->mm->mmap_sem);
-                       result = get_user_pages(current, current->mm, user_addr,
-                                       npages, 1, 0, pagevec, NULL);
-                       up_read(&current->mm->mmap_sem);
-                       if (result < 0)
-                               break;
-               } else {
-                       WARN_ON(npages != 1);
-                       result = get_kernel_page(user_addr, 1, pagevec);
-                       if (WARN_ON(result != 1))
-                               break;
-               }
+       nfs_pageio_init_read(&desc, dreq->inode, false,
+                            &nfs_direct_read_completion_ops);
+       get_dreq(dreq);
+       desc.pg_dreq = dreq;
+       atomic_inc(&inode->i_dio_count);
 
-               if ((unsigned)result < npages) {
-                       bytes = result * PAGE_SIZE;
-                       if (bytes <= pgbase) {
-                               nfs_direct_release_pages(pagevec, result);
-                               break;
-                       }
-                       bytes -= pgbase;
-                       npages = result;
-               }
+       while (iov_iter_count(iter)) {
+               struct page **pagevec;
+               size_t bytes;
+               size_t pgbase;
+               unsigned npages, i;
 
+               result = iov_iter_get_pages_alloc(iter, &pagevec, 
+                                                 rsize, &pgbase);
+               if (result < 0)
+                       break;
+       
+               bytes = result;
+               iov_iter_advance(iter, bytes);
+               npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
                for (i = 0; i < npages; i++) {
                        struct nfs_page *req;
                        unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
@@ -480,56 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
                        }
                        req->wb_index = pos >> PAGE_SHIFT;
                        req->wb_offset = pos & ~PAGE_MASK;
-                       if (!nfs_pageio_add_request(desc, req)) {
-                               result = desc->pg_error;
+                       if (!nfs_pageio_add_request(&desc, req)) {
+                               result = desc.pg_error;
                                nfs_release_request(req);
                                break;
                        }
                        pgbase = 0;
                        bytes -= req_len;
-                       started += req_len;
-                       user_addr += req_len;
+                       requested_bytes += req_len;
                        pos += req_len;
-                       count -= req_len;
                        dreq->bytes_left -= req_len;
                }
-               /* The nfs_page now hold references to these pages */
                nfs_direct_release_pages(pagevec, npages);
-       } while (count != 0 && result >= 0);
-
-       kfree(pagevec);
-
-       if (started)
-               return started;
-       return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-                                             const struct iovec *iov,
-                                             unsigned long nr_segs,
-                                             loff_t pos, bool uio)
-{
-       struct nfs_pageio_descriptor desc;
-       struct inode *inode = dreq->inode;
-       ssize_t result = -EINVAL;
-       size_t requested_bytes = 0;
-       unsigned long seg;
-
-       nfs_pageio_init_read(&desc, dreq->inode, false,
-                            &nfs_direct_read_completion_ops);
-       get_dreq(dreq);
-       desc.pg_dreq = dreq;
-       atomic_inc(&inode->i_dio_count);
-
-       for (seg = 0; seg < nr_segs; seg++) {
-               const struct iovec *vec = &iov[seg];
-               result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+               kvfree(pagevec);
                if (result < 0)
                        break;
-               requested_bytes += result;
-               if ((size_t)result < vec->iov_len)
-                       break;
-               pos += vec->iov_len;
        }
 
        nfs_pageio_complete(&desc);
@@ -552,8 +494,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers into which to read data
  * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
@@ -570,8 +511,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+                               loff_t pos, bool uio)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
@@ -579,9 +520,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
        struct nfs_direct_req *dreq;
        struct nfs_lock_context *l_ctx;
        ssize_t result = -EINVAL;
-       size_t count;
-
-       count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
 
        dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
@@ -604,7 +543,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
                goto out_unlock;
 
        dreq->inode = inode;
-       dreq->bytes_left = iov_length(iov, nr_segs);
+       dreq->bytes_left = count;
        dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
        l_ctx = nfs_get_lock_context(dreq->ctx);
        if (IS_ERR(l_ctx)) {
@@ -615,8 +554,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
        if (!is_sync_kiocb(iocb))
                dreq->iocb = iocb;
 
-       NFS_I(inode)->read_io += iov_length(iov, nr_segs);
-       result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+       NFS_I(inode)->read_io += count;
+       result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
        mutex_unlock(&inode->i_mutex);
 
@@ -772,108 +711,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 }
 #endif
 
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes.  Write length accounting is
- * handled automatically by nfs_direct_write_result().  Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
-                                                const struct iovec *iov,
-                                                loff_t pos, bool uio)
-{
-       struct nfs_direct_req *dreq = desc->pg_dreq;
-       struct nfs_open_context *ctx = dreq->ctx;
-       struct inode *inode = ctx->dentry->d_inode;
-       unsigned long user_addr = (unsigned long)iov->iov_base;
-       size_t count = iov->iov_len;
-       size_t wsize = NFS_SERVER(inode)->wsize;
-       unsigned int pgbase;
-       int result;
-       ssize_t started = 0;
-       struct page **pagevec = NULL;
-       unsigned int npages;
-
-       do {
-               size_t bytes;
-               int i;
-
-               pgbase = user_addr & ~PAGE_MASK;
-               bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
-               result = -ENOMEM;
-               npages = nfs_page_array_len(pgbase, bytes);
-               if (!pagevec)
-                       pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
-               if (!pagevec)
-                       break;
-
-               if (uio) {
-                       down_read(&current->mm->mmap_sem);
-                       result = get_user_pages(current, current->mm, user_addr,
-                                               npages, 0, 0, pagevec, NULL);
-                       up_read(&current->mm->mmap_sem);
-                       if (result < 0)
-                               break;
-               } else {
-                       WARN_ON(npages != 1);
-                       result = get_kernel_page(user_addr, 0, pagevec);
-                       if (WARN_ON(result != 1))
-                               break;
-               }
-
-               if ((unsigned)result < npages) {
-                       bytes = result * PAGE_SIZE;
-                       if (bytes <= pgbase) {
-                               nfs_direct_release_pages(pagevec, result);
-                               break;
-                       }
-                       bytes -= pgbase;
-                       npages = result;
-               }
-
-               for (i = 0; i < npages; i++) {
-                       struct nfs_page *req;
-                       unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
-                       req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
-                                                pgbase, req_len);
-                       if (IS_ERR(req)) {
-                               result = PTR_ERR(req);
-                               break;
-                       }
-                       nfs_lock_request(req);
-                       req->wb_index = pos >> PAGE_SHIFT;
-                       req->wb_offset = pos & ~PAGE_MASK;
-                       if (!nfs_pageio_add_request(desc, req)) {
-                               result = desc->pg_error;
-                               nfs_unlock_and_release_request(req);
-                               break;
-                       }
-                       pgbase = 0;
-                       bytes -= req_len;
-                       started += req_len;
-                       user_addr += req_len;
-                       pos += req_len;
-                       count -= req_len;
-                       dreq->bytes_left -= req_len;
-               }
-               /* The nfs_page now hold references to these pages */
-               nfs_direct_release_pages(pagevec, npages);
-       } while (count != 0 && result >= 0);
-
-       kfree(pagevec);
-
-       if (started)
-               return started;
-       return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
 static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
        struct nfs_direct_req *dreq = hdr->dreq;
@@ -956,16 +793,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
        .completion = nfs_direct_write_completion,
 };
 
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes.  Write length accounting is
+ * handled automatically by nfs_direct_write_result().  Otherwise, if
+ * no requests have been sent, just return an error.
+ */
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
-                                              const struct iovec *iov,
-                                              unsigned long nr_segs,
-                                              loff_t pos, bool uio)
+                                              struct iov_iter *iter,
+                                              loff_t pos)
 {
        struct nfs_pageio_descriptor desc;
        struct inode *inode = dreq->inode;
        ssize_t result = 0;
        size_t requested_bytes = 0;
-       unsigned long seg;
+       size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
 
        nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
                              &nfs_direct_write_completion_ops);
@@ -973,16 +821,49 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
        get_dreq(dreq);
        atomic_inc(&inode->i_dio_count);
 
-       NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
-       for (seg = 0; seg < nr_segs; seg++) {
-               const struct iovec *vec = &iov[seg];
-               result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+       NFS_I(inode)->write_io += iov_iter_count(iter);
+       while (iov_iter_count(iter)) {
+               struct page **pagevec;
+               size_t bytes;
+               size_t pgbase;
+               unsigned npages, i;
+
+               result = iov_iter_get_pages_alloc(iter, &pagevec, 
+                                                 wsize, &pgbase);
                if (result < 0)
                        break;
-               requested_bytes += result;
-               if ((size_t)result < vec->iov_len)
+
+               bytes = result;
+               iov_iter_advance(iter, bytes);
+               npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+               for (i = 0; i < npages; i++) {
+                       struct nfs_page *req;
+                       unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+                       req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+                                                pgbase, req_len);
+                       if (IS_ERR(req)) {
+                               result = PTR_ERR(req);
+                               break;
+                       }
+                       nfs_lock_request(req);
+                       req->wb_index = pos >> PAGE_SHIFT;
+                       req->wb_offset = pos & ~PAGE_MASK;
+                       if (!nfs_pageio_add_request(&desc, req)) {
+                               result = desc.pg_error;
+                               nfs_unlock_and_release_request(req);
+                               break;
+                       }
+                       pgbase = 0;
+                       bytes -= req_len;
+                       requested_bytes += req_len;
+                       pos += req_len;
+                       dreq->bytes_left -= req_len;
+               }
+               nfs_direct_release_pages(pagevec, npages);
+               kvfree(pagevec);
+               if (result < 0)
                        break;
-               pos += vec->iov_len;
        }
        nfs_pageio_complete(&desc);
 
@@ -1004,8 +885,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
  * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
@@ -1023,8 +903,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+                               loff_t pos, bool uio)
 {
        ssize_t result = -EINVAL;
        struct file *file = iocb->ki_filp;
@@ -1033,9 +913,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
        struct nfs_direct_req *dreq;
        struct nfs_lock_context *l_ctx;
        loff_t end;
-       size_t count;
-
-       count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(iter);
        end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
 
        nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
@@ -1086,7 +964,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
        if (!is_sync_kiocb(iocb))
                dreq->iocb = iocb;
 
-       result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+       result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
 
        if (mapping->nrpages) {
                invalidate_inode_pages2_range(mapping,
index c1edf7336315c3f8ddffe45261d814b8f1877771..4042ff58fe3f3d0b18d705774c3f6d975e642248 100644 (file)
@@ -165,22 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id)
 EXPORT_SYMBOL_GPL(nfs_file_flush);
 
 ssize_t
-nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t result;
 
        if (iocb->ki_filp->f_flags & O_DIRECT)
-               return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+               return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
 
-       dprintk("NFS: read(%pD2, %lu@%lu)\n",
+       dprintk("NFS: read(%pD2, %zu@%lu)\n",
                iocb->ki_filp,
-               (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+               iov_iter_count(to), (unsigned long) iocb->ki_pos);
 
        result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
        if (!result) {
-               result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+               result = generic_file_read_iter(iocb, to);
                if (result > 0)
                        nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
        }
@@ -635,24 +634,24 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
        return 0;
 }
 
-ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
-                      unsigned long nr_segs, loff_t pos)
+ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        unsigned long written = 0;
        ssize_t result;
-       size_t count = iov_length(iov, nr_segs);
+       size_t count = iov_iter_count(from);
+       loff_t pos = iocb->ki_pos;
 
        result = nfs_key_timeout_notify(file, inode);
        if (result)
                return result;
 
        if (file->f_flags & O_DIRECT)
-               return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+               return nfs_file_direct_write(iocb, from, pos, true);
 
-       dprintk("NFS: write(%pD2, %lu@%Ld)\n",
-               file, (unsigned long) count, (long long) pos);
+       dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+               file, count, (long long) pos);
 
        result = -EBUSY;
        if (IS_SWAPFILE(inode))
@@ -670,7 +669,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
        if (!count)
                goto out;
 
-       result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       result = generic_file_write_iter(iocb, from);
        if (result > 0)
                written = result;
 
@@ -691,36 +690,6 @@ out_swapfile:
 }
 EXPORT_SYMBOL_GPL(nfs_file_write);
 
-ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
-                             struct file *filp, loff_t *ppos,
-                             size_t count, unsigned int flags)
-{
-       struct inode *inode = file_inode(filp);
-       unsigned long written = 0;
-       ssize_t ret;
-
-       dprintk("NFS splice_write(%pD2, %lu@%llu)\n",
-               filp, (unsigned long) count, (unsigned long long) *ppos);
-
-       /*
-        * The combination of splice and an O_APPEND destination is disallowed.
-        */
-
-       ret = generic_file_splice_write(pipe, filp, ppos, count, flags);
-       if (ret > 0)
-               written = ret;
-
-       if (ret >= 0 && nfs_need_sync_write(filp, inode)) {
-               int err = vfs_fsync(filp, 0);
-               if (err < 0)
-                       ret = err;
-       }
-       if (ret > 0)
-               nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_write);
-
 static int
 do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
@@ -935,10 +904,10 @@ EXPORT_SYMBOL_GPL(nfs_setlease);
 
 const struct file_operations nfs_file_operations = {
        .llseek         = nfs_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = nfs_file_read,
-       .aio_write      = nfs_file_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = nfs_file_read,
+       .write_iter     = nfs_file_write,
        .mmap           = nfs_file_mmap,
        .open           = nfs_file_open,
        .flush          = nfs_file_flush,
@@ -947,7 +916,7 @@ const struct file_operations nfs_file_operations = {
        .lock           = nfs_lock,
        .flock          = nfs_flock,
        .splice_read    = nfs_file_splice_read,
-       .splice_write   = nfs_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .check_flags    = nfs_check_flags,
        .setlease       = nfs_setlease,
 };
index 8b69cba1bb04d9b177bca18a2f95c7b0162b8cf1..82ddbf46660e3c1be7d499f2ca014ce619da8603 100644 (file)
@@ -327,16 +327,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
 int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
 loff_t nfs_file_llseek(struct file *, loff_t, int);
 int nfs_file_flush(struct file *, fl_owner_t);
-ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
 ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
                             size_t, unsigned int);
 int nfs_file_mmap(struct file *, struct vm_area_struct *);
-ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
 int nfs_file_release(struct inode *, struct file *);
 int nfs_lock(struct file *, int, struct file_lock *);
 int nfs_flock(struct file *, int, struct file_lock *);
-ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
-                             size_t, unsigned int);
 int nfs_check_flags(int);
 int nfs_setlease(struct file *, long, struct file_lock **);
 
index 464db9dd63180dc7baf3695f51471747426144fb..a816f0627a6ce03cda2502c42c780c5ab6a2742c 100644 (file)
@@ -117,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 const struct file_operations nfs4_file_operations = {
        .llseek         = nfs_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = nfs_file_read,
-       .aio_write      = nfs_file_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = nfs_file_read,
+       .write_iter     = nfs_file_write,
        .mmap           = nfs_file_mmap,
        .open           = nfs4_file_open,
        .flush          = nfs_file_flush,
@@ -129,7 +129,7 @@ const struct file_operations nfs4_file_operations = {
        .lock           = nfs_lock,
        .flock          = nfs_flock,
        .splice_read    = nfs_file_splice_read,
-       .splice_write   = nfs_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .check_flags    = nfs_check_flags,
        .setlease       = nfs_setlease,
 };
index f3a82fbcae026357431720d998b5eb6e7eafee66..24978153c0c4daefd04f2853576db34aa3e8e0fe 100644 (file)
@@ -152,10 +152,10 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  */
 const struct file_operations nilfs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .unlocked_ioctl = nilfs_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = nilfs_compat_ioctl,
index b9c5726120e32acb70d20df47986433a6ca5f153..6252b173a46590225e2ba29f7e07cf13aa62eeac 100644 (file)
@@ -298,19 +298,20 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-               loff_t offset, unsigned long nr_segs)
+nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+               loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = file->f_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t size;
 
        if (rw == WRITE)
                return 0;
 
        /* Needs synchronization with the cleaner */
-       size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+       size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
                                  nilfs_get_block);
 
        /*
@@ -319,7 +320,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
         */
        if (unlikely((rw & WRITE) && size < 0)) {
                loff_t isize = i_size_read(inode);
-               loff_t end = offset + iov_length(iov, nr_segs);
+               loff_t end = offset + count;
 
                if (end > isize)
                        nilfs_write_failed(mapping, end);
index 86ddab916b6607e3cab28c276359b8b98971a46c..5c9e2c81cb11db029ece7873766041ada8c65024 100644 (file)
@@ -2090,10 +2090,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
        size_t count;           /* after file limit checks */
        ssize_t written, err;
 
-       count = 0;
-       err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-       if (err)
-               return err;
+       count = iov_length(iov, nr_segs);
        pos = *ppos;
        /* We can write back this queue in page reclaim. */
        current->backing_dev_info = mapping->backing_dev_info;
@@ -2202,8 +2199,8 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 
 const struct file_operations ntfs_file_ops = {
        .llseek         = generic_file_llseek,   /* Seek inside file. */
-       .read           = do_sync_read,          /* Read from file. */
-       .aio_read       = generic_file_aio_read, /* Async read from file. */
+       .read           = new_sync_read,         /* Read from file. */
+       .read_iter      = generic_file_read_iter, /* Async read from file. */
 #ifdef NTFS_RW
        .write          = do_sync_write,         /* Write to file. */
        .aio_write      = ntfs_file_aio_write,   /* Async write to file. */
index d310d12a9adc481187c78fb65bffba53a16a75e7..4a231a166cf88d6f76495ab9420e7406ba10307a 100644 (file)
@@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
 
 static ssize_t ocfs2_direct_IO(int rw,
                               struct kiocb *iocb,
-                              const struct iovec *iov,
-                              loff_t offset,
-                              unsigned long nr_segs)
+                              struct iov_iter *iter,
+                              loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file)->i_mapping->host;
@@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
                return 0;
 
        return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-                                   iov, offset, nr_segs,
+                                   iter, offset,
                                    ocfs2_direct_IO_get_blocks,
                                    ocfs2_dio_end_io, NULL, 0);
 }
index 8eb6e5732d3b73b115abea0681cdd95d9195becf..2930e231f3f9fbda2807190726d6ceffffa2a6b5 100644 (file)
@@ -2233,16 +2233,13 @@ out:
        return ret;
 }
 
-static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
-                                   const struct iovec *iov,
-                                   unsigned long nr_segs,
-                                   loff_t pos)
+static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
+                                   struct iov_iter *from)
 {
        int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
        int can_do_direct, has_refcount = 0;
        ssize_t written = 0;
-       size_t ocount;          /* original count */
-       size_t count;           /* after file limit checks */
+       size_t count = iov_iter_count(from);
        loff_t old_size, *ppos = &iocb->ki_pos;
        u32 old_clusters;
        struct file *file = iocb->ki_filp;
@@ -2256,7 +2253,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
                (unsigned long long)OCFS2_I(inode)->ip_blkno,
                file->f_path.dentry->d_name.len,
                file->f_path.dentry->d_name.name,
-               (unsigned int)nr_segs);
+               (unsigned int)from->nr_segs);   /* GRRRRR */
 
        if (iocb->ki_nbytes == 0)
                return 0;
@@ -2354,29 +2351,21 @@ relock:
        /* communicate with ocfs2_dio_end_io */
        ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-       ret = generic_segment_checks(iov, &nr_segs, &ocount,
-                                    VERIFY_READ);
-       if (ret)
-               goto out_dio;
-
-       count = ocount;
        ret = generic_write_checks(file, ppos, &count,
                                   S_ISBLK(inode->i_mode));
        if (ret)
                goto out_dio;
 
+       iov_iter_truncate(from, count);
        if (direct_io) {
-               written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
-                                                   count, ocount);
+               written = generic_file_direct_write(iocb, from, *ppos);
                if (written < 0) {
                        ret = written;
                        goto out_dio;
                }
        } else {
-               struct iov_iter from;
-               iov_iter_init(&from, iov, nr_segs, count, 0);
                current->backing_dev_info = file->f_mapping->backing_dev_info;
-               written = generic_perform_write(file, &from, *ppos);
+               written = generic_perform_write(file, from, *ppos);
                if (likely(written >= 0))
                        iocb->ki_pos = *ppos + written;
                current->backing_dev_info = NULL;
@@ -2441,84 +2430,6 @@ out_sems:
        return ret;
 }
 
-static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
-                               struct file *out,
-                               struct splice_desc *sd)
-{
-       int ret;
-
-       ret = ocfs2_prepare_inode_for_write(out, &sd->pos,
-                                           sd->total_len, 0, NULL, NULL);
-       if (ret < 0) {
-               mlog_errno(ret);
-               return ret;
-       }
-
-       return splice_from_pipe_feed(pipe, sd, pipe_to_file);
-}
-
-static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
-                                      struct file *out,
-                                      loff_t *ppos,
-                                      size_t len,
-                                      unsigned int flags)
-{
-       int ret;
-       struct address_space *mapping = out->f_mapping;
-       struct inode *inode = mapping->host;
-       struct splice_desc sd = {
-               .total_len = len,
-               .flags = flags,
-               .pos = *ppos,
-               .u.file = out,
-       };
-
-
-       trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
-                       (unsigned long long)OCFS2_I(inode)->ip_blkno,
-                       out->f_path.dentry->d_name.len,
-                       out->f_path.dentry->d_name.name, len);
-
-       pipe_lock(pipe);
-
-       splice_from_pipe_begin(&sd);
-       do {
-               ret = splice_from_pipe_next(pipe, &sd);
-               if (ret <= 0)
-                       break;
-
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
-               ret = ocfs2_rw_lock(inode, 1);
-               if (ret < 0)
-                       mlog_errno(ret);
-               else {
-                       ret = ocfs2_splice_to_file(pipe, out, &sd);
-                       ocfs2_rw_unlock(inode, 1);
-               }
-               mutex_unlock(&inode->i_mutex);
-       } while (ret > 0);
-       splice_from_pipe_end(pipe, &sd);
-
-       pipe_unlock(pipe);
-
-       if (sd.num_spliced)
-               ret = sd.num_spliced;
-
-       if (ret > 0) {
-               int err;
-
-               err = generic_write_sync(out, *ppos, ret);
-               if (err)
-                       ret = err;
-               else
-                       *ppos += ret;
-
-               balance_dirty_pages_ratelimited(mapping);
-       }
-
-       return ret;
-}
-
 static ssize_t ocfs2_file_splice_read(struct file *in,
                                      loff_t *ppos,
                                      struct pipe_inode_info *pipe,
@@ -2534,7 +2445,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
                        in->f_path.dentry->d_name.name, len);
 
        /*
-        * See the comment in ocfs2_file_aio_read()
+        * See the comment in ocfs2_file_read_iter()
         */
        ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
        if (ret < 0) {
@@ -2549,10 +2460,8 @@ bail:
        return ret;
 }
 
-static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
-                                  const struct iovec *iov,
-                                  unsigned long nr_segs,
-                                  loff_t pos)
+static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
+                                  struct iov_iter *to)
 {
        int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
        struct file *filp = iocb->ki_filp;
@@ -2561,7 +2470,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
        trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
                        (unsigned long long)OCFS2_I(inode)->ip_blkno,
                        filp->f_path.dentry->d_name.len,
-                       filp->f_path.dentry->d_name.name, nr_segs);
+                       filp->f_path.dentry->d_name.name,
+                       to->nr_segs);   /* GRRRRR */
 
 
        if (!inode) {
@@ -2606,13 +2516,13 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
        }
        ocfs2_inode_unlock(inode, lock_level);
 
-       ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
+       ret = generic_file_read_iter(iocb, to);
        trace_generic_file_aio_read_ret(ret);
 
        /* buffered aio wouldn't have proper lock coverage today */
        BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
 
-       /* see ocfs2_file_aio_write */
+       /* see ocfs2_file_write_iter */
        if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
                rw_level = -1;
                have_alloc_sem = 0;
@@ -2705,14 +2615,14 @@ const struct inode_operations ocfs2_special_file_iops = {
  */
 const struct file_operations ocfs2_fops = {
        .llseek         = ocfs2_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
        .open           = ocfs2_file_open,
-       .aio_read       = ocfs2_file_aio_read,
-       .aio_write      = ocfs2_file_aio_write,
+       .read_iter      = ocfs2_file_read_iter,
+       .write_iter     = ocfs2_file_write_iter,
        .unlocked_ioctl = ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ocfs2_compat_ioctl,
@@ -2720,7 +2630,7 @@ const struct file_operations ocfs2_fops = {
        .lock           = ocfs2_lock,
        .flock          = ocfs2_flock,
        .splice_read    = ocfs2_file_splice_read,
-       .splice_write   = ocfs2_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
 };
 
@@ -2753,21 +2663,21 @@ const struct file_operations ocfs2_dops = {
  */
 const struct file_operations ocfs2_fops_no_plocks = {
        .llseek         = ocfs2_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
        .open           = ocfs2_file_open,
-       .aio_read       = ocfs2_file_aio_read,
-       .aio_write      = ocfs2_file_aio_write,
+       .read_iter      = ocfs2_file_read_iter,
+       .write_iter     = ocfs2_file_write_iter,
        .unlocked_ioctl = ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ocfs2_compat_ioctl,
 #endif
        .flock          = ocfs2_flock,
        .splice_read    = ocfs2_file_splice_read,
-       .splice_write   = ocfs2_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
 };
 
index 54d57d6ba68dd5b91df6cbc9269e1ccf3c05a950..902e88527fcec443244bd12a9a25e1cf895ba763 100644 (file)
@@ -337,10 +337,10 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
 
 const struct file_operations omfs_file_operations = {
        .llseek = generic_file_llseek,
-       .read = do_sync_read,
-       .write = do_sync_write,
-       .aio_read = generic_file_aio_read,
-       .aio_write = generic_file_aio_write,
+       .read = new_sync_read,
+       .write = new_sync_write,
+       .read_iter = generic_file_read_iter,
+       .write_iter = generic_file_write_iter,
        .mmap = generic_file_mmap,
        .fsync = generic_file_fsync,
        .splice_read = generic_file_splice_read,
index 9d64679cec73b00fc4685e23d69374ca122fed09..36662d0362379698fcb5764fbb142337732b3919 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -725,6 +725,12 @@ static int do_dentry_open(struct file *f,
        }
        if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_inc(inode);
+       if ((f->f_mode & FMODE_READ) &&
+            likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
+               f->f_mode |= FMODE_CAN_READ;
+       if ((f->f_mode & FMODE_WRITE) &&
+            likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
+               f->f_mode |= FMODE_CAN_WRITE;
 
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
index 034bffac3f9724c6121f4635ba9740d61e106d06..21981e58e2a634c09b9ebb9b327860d849fb6b53 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe)
        pipe_lock(pipe);
 }
 
-static int
-pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
-                       int atomic)
-{
-       unsigned long copy;
-
-       while (len > 0) {
-               while (!iov->iov_len)
-                       iov++;
-               copy = min_t(unsigned long, len, iov->iov_len);
-
-               if (atomic) {
-                       if (__copy_from_user_inatomic(to, iov->iov_base, copy))
-                               return -EFAULT;
-               } else {
-                       if (copy_from_user(to, iov->iov_base, copy))
-                               return -EFAULT;
-               }
-               to += copy;
-               len -= copy;
-               iov->iov_base += copy;
-               iov->iov_len -= copy;
-       }
-       return 0;
-}
-
-/*
- * Pre-fault in the user memory, so we can use atomic copies.
- */
-static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
-{
-       while (!iov->iov_len)
-               iov++;
-
-       while (len > 0) {
-               unsigned long this_len;
-
-               this_len = min_t(unsigned long, len, iov->iov_len);
-               fault_in_pages_readable(iov->iov_base, this_len);
-               len -= this_len;
-               iov++;
-       }
-}
-
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                                  struct pipe_buffer *buf)
 {
@@ -271,24 +227,18 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = {
 };
 
 static ssize_t
-pipe_read(struct kiocb *iocb, const struct iovec *_iov,
-          unsigned long nr_segs, loff_t pos)
+pipe_read(struct kiocb *iocb, struct iov_iter *to)
 {
+       size_t total_len = iov_iter_count(to);
        struct file *filp = iocb->ki_filp;
        struct pipe_inode_info *pipe = filp->private_data;
        int do_wakeup;
        ssize_t ret;
-       struct iovec *iov = (struct iovec *)_iov;
-       size_t total_len;
-       struct iov_iter iter;
 
-       total_len = iov_length(iov, nr_segs);
        /* Null read succeeds. */
        if (unlikely(total_len == 0))
                return 0;
 
-       iov_iter_init(&iter, iov, nr_segs, total_len, 0);
-
        do_wakeup = 0;
        ret = 0;
        __pipe_lock(pipe);
@@ -312,7 +262,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                                break;
                        }
 
-                       written = copy_page_to_iter(buf->page, buf->offset, chars, &iter);
+                       written = copy_page_to_iter(buf->page, buf->offset, chars, to);
                        if (unlikely(written < chars)) {
                                if (!ret)
                                        ret = -EFAULT;
@@ -386,24 +336,19 @@ static inline int is_packetized(struct file *file)
 }
 
 static ssize_t
-pipe_write(struct kiocb *iocb, const struct iovec *_iov,
-           unsigned long nr_segs, loff_t ppos)
+pipe_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *filp = iocb->ki_filp;
        struct pipe_inode_info *pipe = filp->private_data;
-       ssize_t ret;
-       int do_wakeup;
-       struct iovec *iov = (struct iovec *)_iov;
-       size_t total_len;
+       ssize_t ret = 0;
+       int do_wakeup = 0;
+       size_t total_len = iov_iter_count(from);
        ssize_t chars;
 
-       total_len = iov_length(iov, nr_segs);
        /* Null write succeeds. */
        if (unlikely(total_len == 0))
                return 0;
 
-       do_wakeup = 0;
-       ret = 0;
        __pipe_lock(pipe);
 
        if (!pipe->readers) {
@@ -422,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
                int offset = buf->offset + buf->len;
 
                if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-                       int error, atomic = 1;
-                       void *addr;
-
-                       error = ops->confirm(pipe, buf);
+                       int error = ops->confirm(pipe, buf);
                        if (error)
                                goto out;
 
-                       iov_fault_in_pages_read(iov, chars);
-redo1:
-                       if (atomic)
-                               addr = kmap_atomic(buf->page);
-                       else
-                               addr = kmap(buf->page);
-                       error = pipe_iov_copy_from_user(offset + addr, iov,
-                                                       chars, atomic);
-                       if (atomic)
-                               kunmap_atomic(addr);
-                       else
-                               kunmap(buf->page);
-                       ret = error;
-                       do_wakeup = 1;
-                       if (error) {
-                               if (atomic) {
-                                       atomic = 0;
-                                       goto redo1;
-                               }
+                       ret = copy_page_from_iter(buf->page, offset, chars, from);
+                       if (unlikely(ret < chars)) {
+                               error = -EFAULT;
                                goto out;
                        }
+                       do_wakeup = 1;
                        buf->len += chars;
-                       total_len -= chars;
                        ret = chars;
-                       if (!total_len)
+                       if (!iov_iter_count(from))
                                goto out;
                }
        }
@@ -472,8 +398,7 @@ redo1:
                        int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
                        struct pipe_buffer *buf = pipe->bufs + newbuf;
                        struct page *page = pipe->tmp_page;
-                       char *src;
-                       int error, atomic = 1;
+                       int copied;
 
                        if (!page) {
                                page = alloc_page(GFP_HIGHUSER);
@@ -489,40 +414,19 @@ redo1:
                         * FIXME! Is this really true?
                         */
                        do_wakeup = 1;
-                       chars = PAGE_SIZE;
-                       if (chars > total_len)
-                               chars = total_len;
-
-                       iov_fault_in_pages_read(iov, chars);
-redo2:
-                       if (atomic)
-                               src = kmap_atomic(page);
-                       else
-                               src = kmap(page);
-
-                       error = pipe_iov_copy_from_user(src, iov, chars,
-                                                       atomic);
-                       if (atomic)
-                               kunmap_atomic(src);
-                       else
-                               kunmap(page);
-
-                       if (unlikely(error)) {
-                               if (atomic) {
-                                       atomic = 0;
-                                       goto redo2;
-                               }
+                       copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
+                       if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
                                if (!ret)
-                                       ret = error;
+                                       ret = -EFAULT;
                                break;
                        }
-                       ret += chars;
+                       ret += copied;
 
                        /* Insert it into the buffer array */
                        buf->page = page;
                        buf->ops = &anon_pipe_buf_ops;
                        buf->offset = 0;
-                       buf->len = chars;
+                       buf->len = copied;
                        buf->flags = 0;
                        if (is_packetized(filp)) {
                                buf->ops = &packet_pipe_buf_ops;
@@ -531,8 +435,7 @@ redo2:
                        pipe->nrbufs = ++bufs;
                        pipe->tmp_page = NULL;
 
-                       total_len -= chars;
-                       if (!total_len)
+                       if (!iov_iter_count(from))
                                break;
                }
                if (bufs < pipe->buffers)
@@ -1044,10 +947,10 @@ err:
 const struct file_operations pipefifo_fops = {
        .open           = fifo_open,
        .llseek         = no_llseek,
-       .read           = do_sync_read,
-       .aio_read       = pipe_read,
-       .write          = do_sync_write,
-       .aio_write      = pipe_write,
+       .read           = new_sync_read,
+       .read_iter      = pipe_read,
+       .write          = new_sync_write,
+       .write_iter     = pipe_write,
        .poll           = pipe_poll,
        .unlocked_ioctl = pipe_ioctl,
        .release        = pipe_release,
index 1e56a4e8cf7cd47d4886731ea063c95be9f078ee..4f56de822d2f5995b81006e0bc9783d321b1621d 100644 (file)
 #include "internal.h"
 
 const struct file_operations ramfs_file_operations = {
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .fsync          = noop_fsync,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .llseek         = generic_file_llseek,
 };
 
index 0b3d8e4cb2fa00dd8b906d1ddc811bc17a4c8e07..dda012ad4208d3192521c80a082af5ff038f5b3b 100644 (file)
@@ -37,13 +37,13 @@ static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 const struct file_operations ramfs_file_operations = {
        .mmap                   = ramfs_nommu_mmap,
        .get_unmapped_area      = ramfs_nommu_get_unmapped_area,
-       .read                   = do_sync_read,
-       .aio_read               = generic_file_aio_read,
-       .write                  = do_sync_write,
-       .aio_write              = generic_file_aio_write,
+       .read                   = new_sync_read,
+       .read_iter              = generic_file_read_iter,
+       .write                  = new_sync_write,
+       .write_iter             = generic_file_write_iter,
        .fsync                  = noop_fsync,
        .splice_read            = generic_file_splice_read,
-       .splice_write           = generic_file_splice_write,
+       .splice_write           = iter_file_splice_write,
        .llseek                 = generic_file_llseek,
 };
 
index 31c6efa431839e41f4b39b314cb640dd89b68ecc..009d8542a889c7d2b4b98334f0c4868d36076739 100644 (file)
 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
                unsigned long, loff_t);
+typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
 
 const struct file_operations generic_ro_fops = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
        .mmap           = generic_file_readonly_mmap,
        .splice_read    = generic_file_splice_read,
 };
@@ -390,13 +391,34 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 EXPORT_SYMBOL(do_sync_read);
 
+ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
+{
+       struct iovec iov = { .iov_base = buf, .iov_len = len };
+       struct kiocb kiocb;
+       struct iov_iter iter;
+       ssize_t ret;
+
+       init_sync_kiocb(&kiocb, filp);
+       kiocb.ki_pos = *ppos;
+       kiocb.ki_nbytes = len;
+       iov_iter_init(&iter, READ, &iov, 1, len);
+
+       ret = filp->f_op->read_iter(&kiocb, &iter);
+       if (-EIOCBQUEUED == ret)
+               ret = wait_on_sync_kiocb(&kiocb);
+       *ppos = kiocb.ki_pos;
+       return ret;
+}
+
+EXPORT_SYMBOL(new_sync_read);
+
 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 {
        ssize_t ret;
 
        if (!(file->f_mode & FMODE_READ))
                return -EBADF;
-       if (!file->f_op->read && !file->f_op->aio_read)
+       if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;
        if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
                return -EFAULT;
@@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
                count = ret;
                if (file->f_op->read)
                        ret = file->f_op->read(file, buf, count, pos);
-               else
+               else if (file->f_op->aio_read)
                        ret = do_sync_read(file, buf, count, pos);
+               else
+                       ret = new_sync_read(file, buf, count, pos);
                if (ret > 0) {
                        fsnotify_access(file);
                        add_rchar(current, ret);
@@ -439,13 +463,34 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 EXPORT_SYMBOL(do_sync_write);
 
+ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
+{
+       struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
+       struct kiocb kiocb;
+       struct iov_iter iter;
+       ssize_t ret;
+
+       init_sync_kiocb(&kiocb, filp);
+       kiocb.ki_pos = *ppos;
+       kiocb.ki_nbytes = len;
+       iov_iter_init(&iter, WRITE, &iov, 1, len);
+
+       ret = filp->f_op->write_iter(&kiocb, &iter);
+       if (-EIOCBQUEUED == ret)
+               ret = wait_on_sync_kiocb(&kiocb);
+       *ppos = kiocb.ki_pos;
+       return ret;
+}
+
+EXPORT_SYMBOL(new_sync_write);
+
 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
 {
        mm_segment_t old_fs;
        const char __user *p;
        ssize_t ret;
 
-       if (!file->f_op->write && !file->f_op->aio_write)
+       if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
 
        old_fs = get_fs();
@@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
                count =  MAX_RW_COUNT;
        if (file->f_op->write)
                ret = file->f_op->write(file, p, count, pos);
-       else
+       else if (file->f_op->aio_write)
                ret = do_sync_write(file, p, count, pos);
+       else
+               ret = new_sync_write(file, p, count, pos);
        set_fs(old_fs);
        if (ret > 0) {
                fsnotify_modify(file);
@@ -472,7 +519,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
-       if (!file->f_op->write && !file->f_op->aio_write)
+       if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
        if (unlikely(!access_ok(VERIFY_READ, buf, count)))
                return -EFAULT;
@@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
                file_start_write(file);
                if (file->f_op->write)
                        ret = file->f_op->write(file, buf, count, pos);
-               else
+               else if (file->f_op->aio_write)
                        ret = do_sync_write(file, buf, count, pos);
+               else
+                       ret = new_sync_write(file, buf, count, pos);
                if (ret > 0) {
                        fsnotify_modify(file);
                        add_wchar(current, ret);
@@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);
 
+static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
+               unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+{
+       struct kiocb kiocb;
+       struct iov_iter iter;
+       ssize_t ret;
+
+       init_sync_kiocb(&kiocb, filp);
+       kiocb.ki_pos = *ppos;
+       kiocb.ki_nbytes = len;
+
+       iov_iter_init(&iter, rw, iov, nr_segs, len);
+       ret = fn(&kiocb, &iter);
+       if (ret == -EIOCBQUEUED)
+               ret = wait_on_sync_kiocb(&kiocb);
+       *ppos = kiocb.ki_pos;
+       return ret;
+}
+
 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
@@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
        ssize_t ret;
        io_fn_t fn;
        iov_fn_t fnv;
+       iter_fn_t iter_fn;
 
        ret = rw_copy_check_uvector(type, uvector, nr_segs,
                                    ARRAY_SIZE(iovstack), iovstack, &iov);
@@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file,
        if (type == READ) {
                fn = file->f_op->read;
                fnv = file->f_op->aio_read;
+               iter_fn = file->f_op->read_iter;
        } else {
                fn = (io_fn_t)file->f_op->write;
                fnv = file->f_op->aio_write;
+               iter_fn = file->f_op->write_iter;
                file_start_write(file);
        }
 
-       if (fnv)
+       if (iter_fn)
+               ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+                                               pos, iter_fn);
+       else if (fnv)
                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
                                                pos, fnv);
        else
@@ -785,7 +859,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 {
        if (!(file->f_mode & FMODE_READ))
                return -EBADF;
-       if (!file->f_op->aio_read && !file->f_op->read)
+       if (!(file->f_mode & FMODE_CAN_READ))
                return -EINVAL;
 
        return do_readv_writev(READ, file, vec, vlen, pos);
@@ -798,7 +872,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 {
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
-       if (!file->f_op->aio_write && !file->f_op->write)
+       if (!(file->f_mode & FMODE_CAN_WRITE))
                return -EINVAL;
 
        return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        ssize_t ret;
        io_fn_t fn;
        iov_fn_t fnv;
+       iter_fn_t iter_fn;
 
        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
                                               UIO_FASTIOV, iovstack, &iov);
@@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        if (type == READ) {
                fn = file->f_op->read;
                fnv = file->f_op->aio_read;
+               iter_fn = file->f_op->read_iter;
        } else {
                fn = (io_fn_t)file->f_op->write;
                fnv = file->f_op->aio_write;
+               iter_fn = file->f_op->write_iter;
                file_start_write(file);
        }
 
-       if (fnv)
+       if (iter_fn)
+               ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+                                               pos, iter_fn);
+       else if (fnv)
                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
                                                pos, fnv);
        else
@@ -964,7 +1044,7 @@ static size_t compat_readv(struct file *file,
                goto out;
 
        ret = -EINVAL;
-       if (!file->f_op->aio_read && !file->f_op->read)
+       if (!(file->f_mode & FMODE_CAN_READ))
                goto out;
 
        ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1041,7 +1121,7 @@ static size_t compat_writev(struct file *file,
                goto out;
 
        ret = -EINVAL;
-       if (!file->f_op->aio_write && !file->f_op->write)
+       if (!(file->f_mode & FMODE_CAN_WRITE))
                goto out;
 
        ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
index 5f6c32c668b68816584f19c982c4b9a22ded751b..db9e80ba53a0db5abe4910fa128bab1e6a2ee6ad 100644 (file)
@@ -243,8 +243,8 @@ drop_write_lock:
 }
 
 const struct file_operations reiserfs_file_operations = {
-       .read = do_sync_read,
-       .write = do_sync_write,
+       .read = new_sync_read,
+       .write = new_sync_write,
        .unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = reiserfs_compat_ioctl,
@@ -253,10 +253,10 @@ const struct file_operations reiserfs_file_operations = {
        .open = reiserfs_file_open,
        .release = reiserfs_file_release,
        .fsync = reiserfs_sync_file,
-       .aio_read = generic_file_aio_read,
-       .aio_write = generic_file_aio_write,
+       .read_iter = generic_file_read_iter,
+       .write_iter = generic_file_write_iter,
        .splice_read = generic_file_splice_read,
-       .splice_write = generic_file_splice_write,
+       .splice_write = iter_file_splice_write,
        .llseek = generic_file_llseek,
 };
 
index e3ca04894919c4d0a38f2623676d7ffe1ce6aff3..63b2b0ec49e6afacd955abf9f172751768ee08ee 100644 (file)
@@ -3279,15 +3279,15 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
  * to do in this section of the code.
  */
 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
-                                 const struct iovec *iov, loff_t offset,
-                                 unsigned long nr_segs)
+                                 struct iov_iter *iter, loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                 reiserfs_get_blocks_direct_io);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+                                reiserfs_get_blocks_direct_io);
 
        /*
         * In case of error extending write may have instantiated a few
@@ -3295,7 +3295,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
         */
        if (unlikely((rw & WRITE) && ret < 0)) {
                loff_t isize = i_size_read(inode);
-               loff_t end = offset + iov_length(iov, nr_segs);
+               loff_t end = offset + count;
 
                if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
                        truncate_setsize(inode, isize);
index f373bde8f545da481ba0a7caa873271dd599b30d..ea06c7554860d7ada89db9ba0a8838e17b40d3fc 100644 (file)
@@ -72,8 +72,8 @@ static int romfs_mmap(struct file *file, struct vm_area_struct *vma)
 
 const struct file_operations romfs_ro_fops = {
        .llseek                 = generic_file_llseek,
-       .read                   = do_sync_read,
-       .aio_read               = generic_file_aio_read,
+       .read                   = new_sync_read,
+       .read_iter              = generic_file_read_iter,
        .splice_read            = generic_file_splice_read,
        .mmap                   = romfs_mmap,
        .get_unmapped_area      = romfs_get_unmapped_area,
index e246954ea48cb486b1c8101e6f621364d6844535..f5cb9ba84510fe5632a62af0bbf3843a45eeba23 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -717,63 +718,6 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
                                    sd->len, &pos, more);
 }
 
-/*
- * This is a little more tricky than the file -> pipe splicing. There are
- * basically three cases:
- *
- *     - Destination page already exists in the address space and there
- *       are users of it. For that case we have no other option that
- *       copying the data. Tough luck.
- *     - Destination page already exists in the address space, but there
- *       are no users of it. Make sure it's uptodate, then drop it. Fall
- *       through to last case.
- *     - Destination page does not exist, we can add the pipe page to
- *       the page cache and avoid the copy.
- *
- * If asked to move pages to the output file (SPLICE_F_MOVE is set in
- * sd->flags), we attempt to migrate pages from the pipe to the output
- * file address space page cache. This is possible if no one else has
- * the pipe page referenced outside of the pipe and page cache. If
- * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
- * a new page in the output file page cache and fill/dirty that.
- */
-int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-                struct splice_desc *sd)
-{
-       struct file *file = sd->u.file;
-       struct address_space *mapping = file->f_mapping;
-       unsigned int offset, this_len;
-       struct page *page;
-       void *fsdata;
-       int ret;
-
-       offset = sd->pos & ~PAGE_CACHE_MASK;
-
-       this_len = sd->len;
-       if (this_len + offset > PAGE_CACHE_SIZE)
-               this_len = PAGE_CACHE_SIZE - offset;
-
-       ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
-                               AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-       if (unlikely(ret))
-               goto out;
-
-       if (buf->page != page) {
-               char *src = kmap_atomic(buf->page);
-               char *dst = kmap_atomic(page);
-
-               memcpy(dst + offset, src + buf->offset, this_len);
-               flush_dcache_page(page);
-               kunmap_atomic(dst);
-               kunmap_atomic(src);
-       }
-       ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
-                               page, fsdata);
-out:
-       return ret;
-}
-EXPORT_SYMBOL(pipe_to_file);
-
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
        smp_mb();
@@ -802,7 +746,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
  *    locking is required around copying the pipe buffers to the
  *    destination.
  */
-int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
                          splice_actor *actor)
 {
        int ret;
@@ -849,7 +793,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 
        return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_feed);
 
 /**
  * splice_from_pipe_next - wait for some data to splice from
@@ -861,7 +804,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed);
  *    value (one) if pipe buffers are available.  It will return zero
  *    or -errno if no more data needs to be spliced.
  */
-int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
        while (!pipe->nrbufs) {
                if (!pipe->writers)
@@ -886,7 +829,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 
        return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_next);
 
 /**
  * splice_from_pipe_begin - start splicing from pipe
@@ -897,12 +839,11 @@ EXPORT_SYMBOL(splice_from_pipe_next);
  *    splice_from_pipe_next() and splice_from_pipe_feed() to
  *    initialize the necessary fields of @sd.
  */
-void splice_from_pipe_begin(struct splice_desc *sd)
+static void splice_from_pipe_begin(struct splice_desc *sd)
 {
        sd->num_spliced = 0;
        sd->need_wakeup = false;
 }
-EXPORT_SYMBOL(splice_from_pipe_begin);
 
 /**
  * splice_from_pipe_end - finish splicing from pipe
@@ -914,12 +855,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin);
  *    be called after a loop containing splice_from_pipe_next() and
  *    splice_from_pipe_feed().
  */
-void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
        if (sd->need_wakeup)
                wakeup_pipe_writers(pipe);
 }
-EXPORT_SYMBOL(splice_from_pipe_end);
 
 /**
  * __splice_from_pipe - splice data from a pipe to given actor
@@ -985,7 +925,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 }
 
 /**
- * generic_file_splice_write - splice data from a pipe to a file
+ * iter_file_splice_write - splice data from a pipe to a file
  * @pipe:      pipe info
  * @out:       file to write to
  * @ppos:      position in @out
@@ -995,40 +935,122 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
  * Description:
  *    Will either move or copy pages (determined by @flags options) from
  *    the given pipe inode to the given file.
+ *    This one is ->write_iter-based.
  *
  */
 ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                          loff_t *ppos, size_t len, unsigned int flags)
 {
-       struct address_space *mapping = out->f_mapping;
-       struct inode *inode = mapping->host;
        struct splice_desc sd = {
                .total_len = len,
                .flags = flags,
                .pos = *ppos,
                .u.file = out,
        };
+       int nbufs = pipe->buffers;
+       struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+                                       GFP_KERNEL);
        ssize_t ret;
 
+       if (unlikely(!array))
+               return -ENOMEM;
+
        pipe_lock(pipe);
 
        splice_from_pipe_begin(&sd);
-       do {
+       while (sd.total_len) {
+               struct iov_iter from;
+               struct kiocb kiocb;
+               size_t left;
+               int n, idx;
+
                ret = splice_from_pipe_next(pipe, &sd);
                if (ret <= 0)
                        break;
 
-               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
-               ret = file_remove_suid(out);
-               if (!ret) {
-                       ret = file_update_time(out);
-                       if (!ret)
-                               ret = splice_from_pipe_feed(pipe, &sd,
-                                                           pipe_to_file);
+               if (unlikely(nbufs < pipe->buffers)) {
+                       kfree(array);
+                       nbufs = pipe->buffers;
+                       array = kcalloc(nbufs, sizeof(struct bio_vec),
+                                       GFP_KERNEL);
+                       if (!array) {
+                               ret = -ENOMEM;
+                               break;
+                       }
                }
-               mutex_unlock(&inode->i_mutex);
-       } while (ret > 0);
+
+               /* build the vector */
+               left = sd.total_len;
+               for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+                       struct pipe_buffer *buf = pipe->bufs + idx;
+                       size_t this_len = buf->len;
+
+                       if (this_len > left)
+                               this_len = left;
+
+                       if (idx == pipe->buffers - 1)
+                               idx = -1;
+
+                       ret = buf->ops->confirm(pipe, buf);
+                       if (unlikely(ret)) {
+                               if (ret == -ENODATA)
+                                       ret = 0;
+                               goto done;
+                       }
+
+                       array[n].bv_page = buf->page;
+                       array[n].bv_len = this_len;
+                       array[n].bv_offset = buf->offset;
+                       left -= this_len;
+               }
+
+               /* ... iov_iter */
+               from.type = ITER_BVEC | WRITE;
+               from.bvec = array;
+               from.nr_segs = n;
+               from.count = sd.total_len - left;
+               from.iov_offset = 0;
+
+               /* ... and iocb */
+               init_sync_kiocb(&kiocb, out);
+               kiocb.ki_pos = sd.pos;
+               kiocb.ki_nbytes = sd.total_len - left;
+
+               /* now, send it */
+               ret = out->f_op->write_iter(&kiocb, &from);
+               if (-EIOCBQUEUED == ret)
+                       ret = wait_on_sync_kiocb(&kiocb);
+
+               if (ret <= 0)
+                       break;
+
+               sd.num_spliced += ret;
+               sd.total_len -= ret;
+               *ppos = sd.pos = kiocb.ki_pos;
+
+               /* dismiss the fully eaten buffers, adjust the partial one */
+               while (ret) {
+                       struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+                       if (ret >= buf->len) {
+                               const struct pipe_buf_operations *ops = buf->ops;
+                               ret -= buf->len;
+                               buf->len = 0;
+                               buf->ops = NULL;
+                               ops->release(pipe, buf);
+                               pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+                               pipe->nrbufs--;
+                               if (pipe->files)
+                                       sd.need_wakeup = true;
+                       } else {
+                               buf->offset += ret;
+                               buf->len -= ret;
+                               ret = 0;
+                       }
+               }
+       }
+done:
+       kfree(array);
        splice_from_pipe_end(pipe, &sd);
 
        pipe_unlock(pipe);
@@ -1036,21 +1058,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
        if (sd.num_spliced)
                ret = sd.num_spliced;
 
-       if (ret > 0) {
-               int err;
-
-               err = generic_write_sync(out, *ppos, ret);
-               if (err)
-                       ret = err;
-               else
-                       *ppos += ret;
-               balance_dirty_pages_ratelimited(mapping);
-       }
-
        return ret;
 }
 
-EXPORT_SYMBOL(generic_file_splice_write);
+EXPORT_SYMBOL(iter_file_splice_write);
 
 static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                          struct splice_desc *sd)
@@ -1549,7 +1560,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
                goto out;
 
        count = ret;
-       iov_iter_init(&iter, iov, nr_segs, count, 0);
+       iov_iter_init(&iter, READ, iov, nr_segs, count);
 
        sd.len = 0;
        sd.total_len = count;
index 9d4dc6831792a23270148c2d26b0423f656b505c..b00811c75b24f63acb1651991f6972f4b0b6ff54 100644 (file)
  */
 const struct file_operations sysv_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .fsync          = generic_file_fsync,
        .splice_read    = generic_file_splice_read,
index 0ab7f7dfb98b632818a9b1dde1e74f4799633b8b..b5b593c4527005ba50fe0745f2651095dba79331 100644 (file)
@@ -1364,17 +1364,17 @@ static inline int mctime_update_needed(const struct inode *inode,
 
 /**
  * update_ctime - update mtime and ctime of an inode.
- * @c: UBIFS file-system description object
  * @inode: inode to update
  *
  * This function updates mtime and ctime of the inode if it is not equivalent to
  * current time. Returns zero in case of success and a negative error code in
  * case of failure.
  */
-static int update_mctime(struct ubifs_info *c, struct inode *inode)
+static int update_mctime(struct inode *inode)
 {
        struct timespec now = ubifs_current_time(inode);
        struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
 
        if (mctime_update_needed(inode, &now)) {
                int err, release;
@@ -1397,18 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode)
        return 0;
 }
 
-static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                              unsigned long nr_segs, loff_t pos)
+static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-       int err;
-       struct inode *inode = iocb->ki_filp->f_mapping->host;
-       struct ubifs_info *c = inode->i_sb->s_fs_info;
-
-       err = update_mctime(c, inode);
+       int err = update_mctime(file_inode(iocb->ki_filp));
        if (err)
                return err;
 
-       return generic_file_aio_write(iocb, iov, nr_segs, pos);
+       return generic_file_write_iter(iocb, from);
 }
 
 static int ubifs_set_page_dirty(struct page *page)
@@ -1582,15 +1577,15 @@ const struct inode_operations ubifs_symlink_inode_operations = {
 
 const struct file_operations ubifs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = generic_file_aio_read,
-       .aio_write      = ubifs_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = generic_file_read_iter,
+       .write_iter     = ubifs_write_iter,
        .mmap           = ubifs_file_mmap,
        .fsync          = ubifs_fsync,
        .unlocked_ioctl = ubifs_ioctl,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ubifs_compat_ioctl,
 #endif
index d2c170f8b035a4b21ef6eac2274e74b137346d56..d80738fdf424cd61579a0544eab720bb8d7b0a64 100644 (file)
@@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file,
 }
 
 static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
-                                    const struct iovec *iov,
-                                    loff_t offset, unsigned long nr_segs)
+                                    struct iov_iter *iter,
+                                    loff_t offset)
 {
        /* Fallback to buffered I/O. */
        return 0;
@@ -134,8 +134,7 @@ const struct address_space_operations udf_adinicb_aops = {
        .direct_IO      = udf_adinicb_direct_IO,
 };
 
-static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                 unsigned long nr_segs, loff_t ppos)
+static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        ssize_t retval;
        struct file *file = iocb->ki_filp;
@@ -150,7 +149,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (file->f_flags & O_APPEND)
                        pos = inode->i_size;
                else
-                       pos = ppos;
+                       pos = iocb->ki_pos;
 
                if (inode->i_sb->s_blocksize <
                                (udf_file_entry_alloc_offset(inode) +
@@ -171,7 +170,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        } else
                up_write(&iinfo->i_data_sem);
 
-       retval = __generic_file_aio_write(iocb, iov, nr_segs);
+       retval = __generic_file_write_iter(iocb, from);
        mutex_unlock(&inode->i_mutex);
 
        if (retval > 0) {
@@ -252,13 +251,13 @@ static int udf_release_file(struct inode *inode, struct file *filp)
 }
 
 const struct file_operations udf_file_operations = {
-       .read                   = do_sync_read,
-       .aio_read               = generic_file_aio_read,
+       .read                   = new_sync_read,
+       .read_iter              = generic_file_read_iter,
        .unlocked_ioctl         = udf_ioctl,
        .open                   = generic_file_open,
        .mmap                   = generic_file_mmap,
-       .write                  = do_sync_write,
-       .aio_write              = udf_file_aio_write,
+       .write                  = new_sync_write,
+       .write_iter             = udf_file_write_iter,
        .release                = udf_release_file,
        .fsync                  = generic_file_fsync,
        .splice_read            = generic_file_splice_read,
index 5d643706212f411a63b0804d4c896941aecb13d8..236cd48184c2df20e75bc9fee098ded782f2560f 100644 (file)
@@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
-                            const struct iovec *iov,
-                            loff_t offset, unsigned long nr_segs)
+                            struct iov_iter *iter,
+                            loff_t offset)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
+       size_t count = iov_iter_count(iter);
        ssize_t ret;
 
-       ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-                                 udf_get_block);
+       ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
        if (unlikely(ret < 0 && (rw & WRITE)))
-               udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
+               udf_write_failed(mapping, offset + count);
        return ret;
 }
 
index 33afa20d450982eafb4e1bcc77193cce152270d1..c84ec010a6761ff683e732973bd8780d989e6531 100644 (file)
  
 const struct file_operations ufs_file_operations = {
        .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .aio_read       = generic_file_aio_read,
-       .write          = do_sync_write,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .read_iter      = generic_file_read_iter,
+       .write          = new_sync_write,
+       .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .fsync          = generic_file_fsync,
index e32640eedea6430759310ab7b73ce909ab3bc445..faaf716e2080ad5d41cd86dd05c1ac8f4e3e2fad 100644 (file)
@@ -1486,9 +1486,8 @@ STATIC ssize_t
 xfs_vm_direct_IO(
        int                     rw,
        struct kiocb            *iocb,
-       const struct iovec      *iov,
-       loff_t                  offset,
-       unsigned long           nr_segs)
+       struct iov_iter         *iter,
+       loff_t                  offset)
 {
        struct inode            *inode = iocb->ki_filp->f_mapping->host;
        struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
@@ -1496,7 +1495,7 @@ xfs_vm_direct_IO(
        ssize_t                 ret;
 
        if (rw & WRITE) {
-               size_t size = iov_length(iov, nr_segs);
+               size_t size = iov_iter_count(iter);
 
                /*
                 * We cannot preallocate a size update transaction here as we
@@ -1508,17 +1507,15 @@ xfs_vm_direct_IO(
                if (offset + size > XFS_I(inode)->i_d.di_size)
                        ioend->io_isdirect = 1;
 
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+                                           offset, xfs_get_blocks_direct,
                                            xfs_end_io_direct_write, NULL,
                                            DIO_ASYNC_EXTEND);
                if (ret != -EIOCBQUEUED && iocb->private)
                        goto out_destroy_ioend;
        } else {
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+                                           offset, xfs_get_blocks_direct,
                                            NULL, NULL, 0);
        }
 
index 1b8160dc04d120326de6bf39634073b9b7d7e98f..1f66779d7a46628cf3a068dd5c08b36368fb6545 100644 (file)
@@ -229,34 +229,27 @@ xfs_file_fsync(
 }
 
 STATIC ssize_t
-xfs_file_aio_read(
+xfs_file_read_iter(
        struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
+       struct iov_iter         *to)
 {
        struct file             *file = iocb->ki_filp;
        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-       size_t                  size = 0;
+       size_t                  size = iov_iter_count(to);
        ssize_t                 ret = 0;
        int                     ioflags = 0;
        xfs_fsize_t             n;
+       loff_t                  pos = iocb->ki_pos;
 
        XFS_STATS_INC(xs_read_calls);
 
-       BUG_ON(iocb->ki_pos != pos);
-
        if (unlikely(file->f_flags & O_DIRECT))
                ioflags |= IO_ISDIRECT;
        if (file->f_mode & FMODE_NOCMTIME)
                ioflags |= IO_INVIS;
 
-       ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
-       if (ret < 0)
-               return ret;
-
        if (unlikely(ioflags & IO_ISDIRECT)) {
                xfs_buftarg_t   *target =
                        XFS_IS_REALTIME_INODE(ip) ?
@@ -309,7 +302,7 @@ xfs_file_aio_read(
 
        trace_xfs_file_read(ip, size, pos, ioflags);
 
-       ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
+       ret = generic_file_read_iter(iocb, to);
        if (ret > 0)
                XFS_STATS_ADD(xs_read_bytes, ret);
 
@@ -349,47 +342,6 @@ xfs_file_splice_read(
        return ret;
 }
 
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-       struct pipe_inode_info  *pipe,
-       struct file             *outfilp,
-       loff_t                  *ppos,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct inode            *inode = outfilp->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       if (outfilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_write_bytes, ret);
-
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return ret;
-}
-
 /*
  * This routine is called to handle zeroing any space in the last block of the
  * file that is beyond the EOF.  We do this since the size is being increased
@@ -625,10 +577,7 @@ restart:
 STATIC ssize_t
 xfs_file_dio_aio_write(
        struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount)
+       struct iov_iter         *from)
 {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
@@ -636,9 +585,10 @@ xfs_file_dio_aio_write(
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        ssize_t                 ret = 0;
-       size_t                  count = ocount;
        int                     unaligned_io = 0;
        int                     iolock;
+       size_t                  count = iov_iter_count(from);
+       loff_t                  pos = iocb->ki_pos;
        struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                        mp->m_rtdev_targp : mp->m_ddev_targp;
 
@@ -677,6 +627,7 @@ xfs_file_dio_aio_write(
        ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
        if (ret)
                goto out;
+       iov_iter_truncate(from, count);
 
        if (mapping->nrpages) {
                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -698,8 +649,7 @@ xfs_file_dio_aio_write(
        }
 
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, iovp,
-                       &nr_segs, pos, count, ocount);
+       ret = generic_file_direct_write(iocb, from, pos);
 
 out:
        xfs_rw_iunlock(ip, iolock);
@@ -712,10 +662,7 @@ out:
 STATIC ssize_t
 xfs_file_buffered_aio_write(
        struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  count)
+       struct iov_iter         *from)
 {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
@@ -724,7 +671,8 @@ xfs_file_buffered_aio_write(
        ssize_t                 ret;
        int                     enospc = 0;
        int                     iolock = XFS_IOLOCK_EXCL;
-       struct iov_iter         from;
+       loff_t                  pos = iocb->ki_pos;
+       size_t                  count = iov_iter_count(from);
 
        xfs_rw_ilock(ip, iolock);
 
@@ -732,13 +680,13 @@ xfs_file_buffered_aio_write(
        if (ret)
                goto out;
 
-       iov_iter_init(&from, iovp, nr_segs, count, 0);
+       iov_iter_truncate(from, count);
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
 
 write_retry:
        trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_perform_write(file, &from, pos);
+       ret = generic_perform_write(file, from, pos);
        if (likely(ret >= 0))
                iocb->ki_pos = pos + ret;
        /*
@@ -759,40 +707,29 @@ out:
 }
 
 STATIC ssize_t
-xfs_file_aio_write(
+xfs_file_write_iter(
        struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
+       struct iov_iter         *from)
 {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        struct inode            *inode = mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        ssize_t                 ret;
-       size_t                  ocount = 0;
+       size_t                  ocount = iov_iter_count(from);
 
        XFS_STATS_INC(xs_write_calls);
 
-       BUG_ON(iocb->ki_pos != pos);
-
-       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-       if (ret)
-               return ret;
-
        if (ocount == 0)
                return 0;
 
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               ret = -EIO;
-               goto out;
-       }
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
 
        if (unlikely(file->f_flags & O_DIRECT))
-               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
+               ret = xfs_file_dio_aio_write(iocb, from);
        else
-               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
-                                                 ocount);
+               ret = xfs_file_buffered_aio_write(iocb, from);
 
        if (ret > 0) {
                ssize_t err;
@@ -804,8 +741,6 @@ xfs_file_aio_write(
                if (err < 0)
                        ret = err;
        }
-
-out:
        return ret;
 }
 
@@ -1461,12 +1396,12 @@ xfs_file_llseek(
 
 const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = xfs_file_aio_read,
-       .aio_write      = xfs_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = xfs_file_read_iter,
+       .write_iter     = xfs_file_write_iter,
        .splice_read    = xfs_file_splice_read,
-       .splice_write   = xfs_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = xfs_file_compat_ioctl,
index 6910458915cfea9133cc3c39ff56e5f7f775c065..152f82782630222321bcd234b20c0ffb0a626e34 100644 (file)
@@ -1118,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
        TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
index d8e4cea23a257c1b9c8b1514493c407b13d912e0..66c2167f04a9d5788b68e97f446757bad082d780 100644 (file)
@@ -5,8 +5,6 @@
 #ifndef __LINUX_BLK_TYPES_H
 #define __LINUX_BLK_TYPES_H
 
-#ifdef CONFIG_BLOCK
-
 #include <linux/types.h>
 
 struct bio_set;
@@ -28,6 +26,8 @@ struct bio_vec {
        unsigned int    bv_offset;
 };
 
+#ifdef CONFIG_BLOCK
+
 struct bvec_iter {
        sector_t                bi_sector;      /* device address in 512 byte
                                                   sectors */
index 2f49aa4c4f7f1d2feb361422e2b5f024c55a9519..279b0afac1c112e7eee89dddce0847ddcff2d5a2 100644 (file)
@@ -222,8 +222,6 @@ extern void ceph_copy_to_page_vector(struct page **pages,
 extern void ceph_copy_from_page_vector(struct page **pages,
                                    void *data,
                                    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
-                                   loff_t off, size_t len);
 extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
 
 
index c3f46e499dd0027eed7f2bd0a0bc3cd465c3ac44..338e6f758c6d922be7d8163361da051efa0e3cbc 100644 (file)
@@ -128,6 +128,10 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_ATOMIC_POS       ((__force fmode_t)0x8000)
 /* Write access to underlying fs */
 #define FMODE_WRITER           ((__force fmode_t)0x10000)
+/* Has read method(s) */
+#define FMODE_CAN_READ          ((__force fmode_t)0x20000)
+/* Has write method(s) */
+#define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY         ((__force fmode_t)0x1000000)
@@ -343,8 +347,7 @@ struct address_space_operations {
        void (*invalidatepage) (struct page *, unsigned int, unsigned int);
        int (*releasepage) (struct page *, gfp_t);
        void (*freepage)(struct page *);
-       ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-                       loff_t offset, unsigned long nr_segs);
+       ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
        int (*get_xip_mem)(struct address_space *, pgoff_t, int,
                                                void **, unsigned long *);
        /*
@@ -1448,6 +1451,8 @@ struct block_device_operations;
 #define HAVE_COMPAT_IOCTL 1
 #define HAVE_UNLOCKED_IOCTL 1
 
+struct iov_iter;
+
 struct file_operations {
        struct module *owner;
        loff_t (*llseek) (struct file *, loff_t, int);
@@ -1455,6 +1460,8 @@ struct file_operations {
        ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
        ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
        ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+       ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+       ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iterate) (struct file *, struct dir_context *);
        unsigned int (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -2404,20 +2411,18 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
                unsigned long size, pgoff_t pgoff);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
-extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
-               unsigned long *, loff_t, size_t, size_t);
+extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
+extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
+extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern int generic_segment_checks(const struct iovec *iov,
-               unsigned long *nr_segs, size_t *count, int access_flags);
+extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
+extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
-extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t pos);
+extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
                        int datasync);
 extern void block_sync_page(struct page *page);
@@ -2427,7 +2432,7 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t default_file_splice_read(struct file *, loff_t *,
                struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
+extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
                struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
                struct file *out, loff_t *, size_t len, unsigned int flags);
@@ -2477,16 +2482,16 @@ enum {
 void dio_end_io(struct bio *bio, int error);
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-       struct block_device *bdev, const struct iovec *iov, loff_t offset,
-       unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+       struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+       get_block_t get_block, dio_iodone_t end_io,
        dio_submit_t submit_io, int flags);
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
-               struct inode *inode, const struct iovec *iov, loff_t offset,
-               unsigned long nr_segs, get_block_t get_block)
+               struct inode *inode, struct iov_iter *iter, loff_t offset,
+               get_block_t get_block)
 {
-       return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-                                   offset, nr_segs, get_block, NULL, NULL,
+       return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter,
+                                   offset, get_block, NULL, NULL,
                                    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 #endif
index 919576b8e2cfd612d5a2b852f1aa9674811585d1..e30f6059ecd642b44c0cc599344c0421b713958f 100644 (file)
@@ -459,13 +459,12 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
 /*
  * linux/fs/nfs/direct.c
  */
-extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
-                       unsigned long);
+extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
-                       const struct iovec *iov, unsigned long nr_segs,
+                       struct iov_iter *iter,
                        loff_t pos, bool uio);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-                       const struct iovec *iov, unsigned long nr_segs,
+                       struct iov_iter *iter,
                        loff_t pos, bool uio);
 
 /*
index 0e43906d2fda6dc68cffc6343594178465d6e461..da2751d3b93db503b5572b27e97a0e7cc516fc91 100644 (file)
@@ -70,16 +70,6 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
                                splice_actor *);
 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
                                  struct splice_desc *, splice_actor *);
-extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
-                                splice_actor *);
-extern int splice_from_pipe_next(struct pipe_inode_info *,
-                                struct splice_desc *);
-extern void splice_from_pipe_begin(struct splice_desc *);
-extern void splice_from_pipe_end(struct pipe_inode_info *,
-                                struct splice_desc *);
-extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
-                       struct splice_desc *);
-
 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
                              struct splice_pipe_desc *);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
index 199bcc34241ba0155a367f11d05edf5d9c138a02..e2231e47cec131f8de84459568b11aa6d294f62d 100644 (file)
@@ -19,11 +19,21 @@ struct kvec {
        size_t iov_len;
 };
 
+enum {
+       ITER_IOVEC = 0,
+       ITER_KVEC = 2,
+       ITER_BVEC = 4,
+};
+
 struct iov_iter {
-       const struct iovec *iov;
-       unsigned long nr_segs;
+       int type;
        size_t iov_offset;
        size_t count;
+       union {
+               const struct iovec *iov;
+               const struct bio_vec *bvec;
+       };
+       unsigned long nr_segs;
 };
 
 /*
@@ -53,6 +63,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)                         \
+       if (!((start).type & ITER_BVEC))                        \
        for (iter = (start);                                    \
             (iter).count &&                                    \
             ((iov = iov_iter_iovec(&(iter))), 1);              \
@@ -62,32 +73,44 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
 
 size_t iov_iter_copy_from_user_atomic(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes);
-size_t iov_iter_copy_from_user(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i);
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i);
+unsigned long iov_iter_alignment(const struct iov_iter *i);
+void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+                       unsigned long nr_segs, size_t count);
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+                       size_t maxsize, size_t *start);
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
+                       size_t maxsize, size_t *start);
+int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
-static inline void iov_iter_init(struct iov_iter *i,
-                       const struct iovec *iov, unsigned long nr_segs,
-                       size_t count, size_t written)
+static inline size_t iov_iter_count(struct iov_iter *i)
 {
-       i->iov = iov;
-       i->nr_segs = nr_segs;
-       i->iov_offset = 0;
-       i->count = count + written;
+       return i->count;
+}
 
-       iov_iter_advance(i, written);
+static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
+{
+       if (i->count > count)
+               i->count = count;
 }
 
-static inline size_t iov_iter_count(struct iov_iter *i)
+/*
+ * reexpand a previously truncated iterator; count must be no more than how much
+ * we had shrunk it.
+ */
+static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
-       return i->count;
+       i->count = count;
 }
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
+
 #endif
index 7fadf1c6283844f07727a68fe12ce5f554f2fff6..dafb06f70a09dd97b1fa690969a638f596714091 100644 (file)
@@ -1665,96 +1665,42 @@ out:
        return written ? written : error;
 }
 
-/*
- * Performs necessary checks before doing a write
- * @iov:       io vector request
- * @nr_segs:   number of segments in the iovec
- * @count:     number of bytes to write
- * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
- *
- * Adjust number of segments and amount of bytes to write (nr_segs should be
- * properly initialized first). Returns appropriate error code that caller
- * should return or zero in case that write should be allowed.
- */
-int generic_segment_checks(const struct iovec *iov,
-                       unsigned long *nr_segs, size_t *count, int access_flags)
-{
-       unsigned long   seg;
-       size_t cnt = 0;
-       for (seg = 0; seg < *nr_segs; seg++) {
-               const struct iovec *iv = &iov[seg];
-
-               /*
-                * If any segment has a negative length, or the cumulative
-                * length ever wraps negative then return -EINVAL.
-                */
-               cnt += iv->iov_len;
-               if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
-                       return -EINVAL;
-               if (access_ok(access_flags, iv->iov_base, iv->iov_len))
-                       continue;
-               if (seg == 0)
-                       return -EFAULT;
-               *nr_segs = seg;
-               cnt -= iv->iov_len;     /* This segment is no good */
-               break;
-       }
-       *count = cnt;
-       return 0;
-}
-EXPORT_SYMBOL(generic_segment_checks);
-
 /**
- * generic_file_aio_read - generic filesystem read routine
+ * generic_file_read_iter - generic filesystem read routine
  * @iocb:      kernel I/O control block
- * @iov:       io vector request
- * @nr_segs:   number of segments in the iovec
- * @pos:       current file position
+ * @iter:      destination for the data read
  *
- * This is the "read()" routine for all filesystems
+ * This is the "read_iter()" routine for all filesystems
  * that can use the page cache directly.
  */
 ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
-       struct file *filp = iocb->ki_filp;
-       ssize_t retval;
-       size_t count;
+       struct file *file = iocb->ki_filp;
+       ssize_t retval = 0;
        loff_t *ppos = &iocb->ki_pos;
-       struct iov_iter i;
-
-       count = 0;
-       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-       if (retval)
-               return retval;
-       iov_iter_init(&i, iov, nr_segs, count, 0);
+       loff_t pos = *ppos;
 
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
-       if (filp->f_flags & O_DIRECT) {
+       if (file->f_flags & O_DIRECT) {
+               struct address_space *mapping = file->f_mapping;
+               struct inode *inode = mapping->host;
+               size_t count = iov_iter_count(iter);
                loff_t size;
-               struct address_space *mapping;
-               struct inode *inode;
 
-               mapping = filp->f_mapping;
-               inode = mapping->host;
                if (!count)
                        goto out; /* skip atime */
                size = i_size_read(inode);
                retval = filemap_write_and_wait_range(mapping, pos,
-                                       pos + iov_length(iov, nr_segs) - 1);
+                                       pos + count - 1);
                if (!retval) {
-                       retval = mapping->a_ops->direct_IO(READ, iocb,
-                                                          iov, pos, nr_segs);
+                       struct iov_iter data = *iter;
+                       retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
                }
+
                if (retval > 0) {
                        *ppos = pos + retval;
-                       count -= retval;
-                       /*
-                        * If we did a short DIO read we need to skip the
-                        * section of the iov that we've already read data into.
-                        */
-                       iov_iter_advance(&i, retval);
+                       iov_iter_advance(iter, retval);
                }
 
                /*
@@ -1765,17 +1711,17 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                 * and return.  Otherwise fallthrough to buffered io for
                 * the rest of the read.
                 */
-               if (retval < 0 || !count || *ppos >= size) {
-                       file_accessed(filp);
+               if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+                       file_accessed(file);
                        goto out;
                }
        }
 
-       retval = do_generic_file_read(filp, ppos, &i, retval);
+       retval = do_generic_file_read(file, ppos, iter, retval);
 out:
        return retval;
 }
-EXPORT_SYMBOL(generic_file_aio_read);
+EXPORT_SYMBOL(generic_file_read_iter);
 
 #ifdef CONFIG_MMU
 /**
@@ -2386,9 +2332,7 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
 EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
-generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long *nr_segs, loff_t pos,
-               size_t count, size_t ocount)
+generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 {
        struct file     *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
@@ -2396,11 +2340,9 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
        ssize_t         written;
        size_t          write_len;
        pgoff_t         end;
+       struct iov_iter data;
 
-       if (count != ocount)
-               *nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
-
-       write_len = iov_length(iov, *nr_segs);
+       write_len = iov_iter_count(from);
        end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
        written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@ -2427,7 +2369,8 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
                }
        }
 
-       written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+       data = *from;
+       written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos);
 
        /*
         * Finally, try again to invalidate clean pages which might have been
@@ -2444,6 +2387,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
        if (written > 0) {
                pos += written;
+               iov_iter_advance(from, written);
                if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
                        i_size_write(inode, pos);
                        mark_inode_dirty(inode);
@@ -2568,10 +2512,9 @@ again:
 EXPORT_SYMBOL(generic_perform_write);
 
 /**
- * __generic_file_aio_write - write data to a file
+ * __generic_file_write_iter - write data to a file
  * @iocb:      IO state structure (file, offset, etc.)
- * @iov:       vector with data to write
- * @nr_segs:   number of segments in the vector
+ * @from:      iov_iter with data to write
  *
  * This function does all the work needed for actually writing data to a
  * file. It does all basic checks, removes SUID from the file, updates
@@ -2585,26 +2528,16 @@ EXPORT_SYMBOL(generic_perform_write);
  * A caller has to handle it. This is mainly due to the fact that we want to
  * avoid syncing under i_mutex.
  */
-ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                unsigned long nr_segs)
+ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct address_space * mapping = file->f_mapping;
-       size_t ocount;          /* original count */
-       size_t count;           /* after file limit checks */
        struct inode    *inode = mapping->host;
        loff_t          pos = iocb->ki_pos;
        ssize_t         written = 0;
        ssize_t         err;
        ssize_t         status;
-       struct iov_iter from;
-
-       ocount = 0;
-       err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-       if (err)
-               return err;
-
-       count = ocount;
+       size_t          count = iov_iter_count(from);
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
@@ -2615,6 +2548,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (count == 0)
                goto out;
 
+       iov_iter_truncate(from, count);
+
        err = file_remove_suid(file);
        if (err)
                goto out;
@@ -2623,17 +2558,13 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (err)
                goto out;
 
-       iov_iter_init(&from, iov, nr_segs, count, 0);
-
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (unlikely(file->f_flags & O_DIRECT)) {
                loff_t endbyte;
 
-               written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
-                                                       count, ocount);
+               written = generic_file_direct_write(iocb, from, pos);
                if (written < 0 || written == count)
                        goto out;
-               iov_iter_advance(&from, written);
 
                /*
                 * direct-io write to a hole: fall through to buffered I/O
@@ -2642,7 +2573,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                pos += written;
                count -= written;
 
-               status = generic_perform_write(file, &from, pos);
+               status = generic_perform_write(file, from, pos);
                /*
                 * If generic_perform_write() returned a synchronous error
                 * then we want to return the number of bytes which were
@@ -2674,7 +2605,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                         */
                }
        } else {
-               written = generic_perform_write(file, &from, pos);
+               written = generic_perform_write(file, from, pos);
                if (likely(written >= 0))
                        iocb->ki_pos = pos + written;
        }
@@ -2682,30 +2613,25 @@ out:
        current->backing_dev_info = NULL;
        return written ? written : err;
 }
-EXPORT_SYMBOL(__generic_file_aio_write);
+EXPORT_SYMBOL(__generic_file_write_iter);
 
 /**
- * generic_file_aio_write - write data to a file
+ * generic_file_write_iter - write data to a file
  * @iocb:      IO state structure
- * @iov:       vector with data to write
- * @nr_segs:   number of segments in the vector
- * @pos:       position in file where to write
+ * @from:      iov_iter with data to write
  *
- * This is a wrapper around __generic_file_aio_write() to be used by most
+ * This is a wrapper around __generic_file_write_iter() to be used by most
  * filesystems. It takes care of syncing the file in case of O_SYNC file
  * and acquires i_mutex as needed.
  */
-ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos)
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        ssize_t ret;
 
-       BUG_ON(iocb->ki_pos != pos);
-
        mutex_lock(&inode->i_mutex);
-       ret = __generic_file_aio_write(iocb, iov, nr_segs);
+       ret = __generic_file_write_iter(iocb, from);
        mutex_unlock(&inode->i_mutex);
 
        if (ret > 0) {
@@ -2717,7 +2643,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        }
        return ret;
 }
-EXPORT_SYMBOL(generic_file_aio_write);
+EXPORT_SYMBOL(generic_file_write_iter);
 
 /**
  * try_to_release_page() - release old fs-specific metadata on a page
index 10e46cd721de5876d0016aab5969d0b2c6ebdeb3..7b5dbd1517b5594b05d5590cae29c3eb3a1dada2 100644 (file)
@@ -1,8 +1,10 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
 {
        size_t skip, copy, left, wanted;
@@ -72,13 +74,97 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
        }
        kunmap(page);
 done:
+       if (skip == iov->iov_len) {
+               iov++;
+               skip = 0;
+       }
+       i->count -= wanted - bytes;
+       i->nr_segs -= iov - i->iov;
+       i->iov = iov;
+       i->iov_offset = skip;
+       return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, left, wanted;
+       const struct iovec *iov;
+       char __user *buf;
+       void *kaddr, *to;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       iov = i->iov;
+       skip = i->iov_offset;
+       buf = iov->iov_base + skip;
+       copy = min(bytes, iov->iov_len - skip);
+
+       if (!fault_in_pages_readable(buf, copy)) {
+               kaddr = kmap_atomic(page);
+               to = kaddr + offset;
+
+               /* first chunk, usually the only one */
+               left = __copy_from_user_inatomic(to, buf, copy);
+               copy -= left;
+               skip += copy;
+               to += copy;
+               bytes -= copy;
+
+               while (unlikely(!left && bytes)) {
+                       iov++;
+                       buf = iov->iov_base;
+                       copy = min(bytes, iov->iov_len);
+                       left = __copy_from_user_inatomic(to, buf, copy);
+                       copy -= left;
+                       skip = copy;
+                       to += copy;
+                       bytes -= copy;
+               }
+               if (likely(!bytes)) {
+                       kunmap_atomic(kaddr);
+                       goto done;
+               }
+               offset = to - kaddr;
+               buf += copy;
+               kunmap_atomic(kaddr);
+               copy = min(bytes, iov->iov_len - skip);
+       }
+       /* Too bad - revert to non-atomic kmap */
+       kaddr = kmap(page);
+       to = kaddr + offset;
+       left = __copy_from_user(to, buf, copy);
+       copy -= left;
+       skip += copy;
+       to += copy;
+       bytes -= copy;
+       while (unlikely(!left && bytes)) {
+               iov++;
+               buf = iov->iov_base;
+               copy = min(bytes, iov->iov_len);
+               left = __copy_from_user(to, buf, copy);
+               copy -= left;
+               skip = copy;
+               to += copy;
+               bytes -= copy;
+       }
+       kunmap(page);
+done:
+       if (skip == iov->iov_len) {
+               iov++;
+               skip = 0;
+       }
        i->count -= wanted - bytes;
        i->nr_segs -= iov - i->iov;
        i->iov = iov;
        i->iov_offset = skip;
        return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_to_iter);
 
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
                        const struct iovec *iov, size_t base, size_t bytes)
@@ -107,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
  * were successfully copied.  If a fault is encountered then return the number of
  * bytes which were copied.
  */
-size_t iov_iter_copy_from_user_atomic(struct page *page,
+static size_t copy_from_user_atomic_iovec(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes)
 {
        char *kaddr;
@@ -127,36 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 
        return copied;
 }
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
-
-/*
- * This has the same sideeffects and return value as
- * iov_iter_copy_from_user_atomic().
- * The difference is that it attempts to resolve faults.
- * Page must not be locked.
- */
-size_t iov_iter_copy_from_user(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-       char *kaddr;
-       size_t copied;
-
-       kaddr = kmap(page);
-       if (likely(i->nr_segs == 1)) {
-               int left;
-               char __user *buf = i->iov->iov_base + i->iov_offset;
-               left = __copy_from_user(kaddr + offset, buf, bytes);
-               copied = bytes - left;
-       } else {
-               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-                                               i->iov, i->iov_offset, bytes);
-       }
-       kunmap(page);
-       return copied;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user);
 
-void iov_iter_advance(struct iov_iter *i, size_t bytes)
+static void advance_iovec(struct iov_iter *i, size_t bytes)
 {
        BUG_ON(i->count < bytes);
 
@@ -191,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
                i->nr_segs = nr_segs;
        }
 }
-EXPORT_SYMBOL(iov_iter_advance);
 
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
@@ -204,21 +261,483 @@ EXPORT_SYMBOL(iov_iter_advance);
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-       char __user *buf = i->iov->iov_base + i->iov_offset;
-       bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-       return fault_in_pages_readable(buf, bytes);
+       if (!(i->type & ITER_BVEC)) {
+               char __user *buf = i->iov->iov_base + i->iov_offset;
+               bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+               return fault_in_pages_readable(buf, bytes);
+       }
+       return 0;
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
+static unsigned long alignment_iovec(const struct iov_iter *i)
+{
+       const struct iovec *iov = i->iov;
+       unsigned long res;
+       size_t size = i->count;
+       size_t n;
+
+       if (!size)
+               return 0;
+
+       res = (unsigned long)iov->iov_base + i->iov_offset;
+       n = iov->iov_len - i->iov_offset;
+       if (n >= size)
+               return res | size;
+       size -= n;
+       res |= n;
+       while (size > (++iov)->iov_len) {
+               res |= (unsigned long)iov->iov_base | iov->iov_len;
+               size -= iov->iov_len;
+       }
+       res |= (unsigned long)iov->iov_base | size;
+       return res;
+}
+
+void iov_iter_init(struct iov_iter *i, int direction,
+                       const struct iovec *iov, unsigned long nr_segs,
+                       size_t count)
+{
+       /* It will get better.  Eventually... */
+       if (segment_eq(get_fs(), KERNEL_DS))
+               direction |= ITER_KVEC;
+       i->type = direction;
+       i->iov = iov;
+       i->nr_segs = nr_segs;
+       i->iov_offset = 0;
+       i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_init);
+
+static ssize_t get_pages_iovec(struct iov_iter *i,
+                  struct page **pages, size_t maxsize,
+                  size_t *start)
+{
+       size_t offset = i->iov_offset;
+       const struct iovec *iov = i->iov;
+       size_t len;
+       unsigned long addr;
+       int n;
+       int res;
+
+       len = iov->iov_len - offset;
+       if (len > i->count)
+               len = i->count;
+       if (len > maxsize)
+               len = maxsize;
+       addr = (unsigned long)iov->iov_base + offset;
+       len += *start = addr & (PAGE_SIZE - 1);
+       addr &= ~(PAGE_SIZE - 1);
+       n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+       res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+       if (unlikely(res < 0))
+               return res;
+       return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+
+static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       size_t offset = i->iov_offset;
+       const struct iovec *iov = i->iov;
+       size_t len;
+       unsigned long addr;
+       void *p;
+       int n;
+       int res;
+
+       len = iov->iov_len - offset;
+       if (len > i->count)
+               len = i->count;
+       if (len > maxsize)
+               len = maxsize;
+       addr = (unsigned long)iov->iov_base + offset;
+       len += *start = addr & (PAGE_SIZE - 1);
+       addr &= ~(PAGE_SIZE - 1);
+       n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+       
+       p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
+       if (!p)
+               p = vmalloc(n * sizeof(struct page *));
+       if (!p)
+               return -ENOMEM;
+
+       res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+       if (unlikely(res < 0)) {
+               kvfree(p);
+               return res;
+       }
+       *pages = p;
+       return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+
+static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
+{
+       size_t offset = i->iov_offset;
+       size_t size = i->count;
+       const struct iovec *iov = i->iov;
+       int npages = 0;
+       int n;
+
+       for (n = 0; size && n < i->nr_segs; n++, iov++) {
+               unsigned long addr = (unsigned long)iov->iov_base + offset;
+               size_t len = iov->iov_len - offset;
+               offset = 0;
+               if (unlikely(!len))     /* empty segment */
+                       continue;
+               if (len > size)
+                       len = size;
+               npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE
+                         - addr / PAGE_SIZE;
+               if (npages >= maxpages) /* don't bother going further */
+                       return maxpages;
+               size -= len;
+               offset = 0;
+       }
+       return min(npages, maxpages);
+}
+
+static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+{
+       char *from = kmap_atomic(page);
+       memcpy(to, from + offset, len);
+       kunmap_atomic(from);
+}
+
+static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
+{
+       char *to = kmap_atomic(page);
+       memcpy(to + offset, from, len);
+       kunmap_atomic(to);
+}
+
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, wanted;
+       const struct bio_vec *bvec;
+       void *kaddr, *from;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       bvec = i->bvec;
+       skip = i->iov_offset;
+       copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+       kaddr = kmap_atomic(page);
+       from = kaddr + offset;
+       memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
+       skip += copy;
+       from += copy;
+       bytes -= copy;
+       while (bytes) {
+               bvec++;
+               copy = min(bytes, (size_t)bvec->bv_len);
+               memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
+               skip = copy;
+               from += copy;
+               bytes -= copy;
+       }
+       kunmap_atomic(kaddr);
+       if (skip == bvec->bv_len) {
+               bvec++;
+               skip = 0;
+       }
+       i->count -= wanted - bytes;
+       i->nr_segs -= bvec - i->bvec;
+       i->bvec = bvec;
+       i->iov_offset = skip;
+       return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, wanted;
+       const struct bio_vec *bvec;
+       void *kaddr, *to;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       bvec = i->bvec;
+       skip = i->iov_offset;
+
+       kaddr = kmap_atomic(page);
+
+       to = kaddr + offset;
+
+       copy = min(bytes, bvec->bv_len - skip);
+
+       memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
+
+       to += copy;
+       skip += copy;
+       bytes -= copy;
+
+       while (bytes) {
+               bvec++;
+               copy = min(bytes, (size_t)bvec->bv_len);
+               memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
+               skip = copy;
+               to += copy;
+               bytes -= copy;
+       }
+       kunmap_atomic(kaddr);
+       if (skip == bvec->bv_len) {
+               bvec++;
+               skip = 0;
+       }
+       i->count -= wanted;
+       i->nr_segs -= bvec - i->bvec;
+       i->bvec = bvec;
+       i->iov_offset = skip;
+       return wanted;
+}
+
+static size_t copy_from_user_bvec(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+       char *kaddr;
+       size_t left;
+       const struct bio_vec *bvec;
+       size_t base = i->iov_offset;
+
+       kaddr = kmap_atomic(page);
+       for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
+               size_t copy = min(left, bvec->bv_len - base);
+               if (!bvec->bv_len)
+                       continue;
+               memcpy_from_page(kaddr + offset, bvec->bv_page,
+                                bvec->bv_offset + base, copy);
+               offset += copy;
+               left -= copy;
+       }
+       kunmap_atomic(kaddr);
+       return bytes;
+}
+
+static void advance_bvec(struct iov_iter *i, size_t bytes)
+{
+       BUG_ON(i->count < bytes);
+
+       if (likely(i->nr_segs == 1)) {
+               i->iov_offset += bytes;
+               i->count -= bytes;
+       } else {
+               const struct bio_vec *bvec = i->bvec;
+               size_t base = i->iov_offset;
+               unsigned long nr_segs = i->nr_segs;
+
+               /*
+                * The !iov->iov_len check ensures we skip over unlikely
+                * zero-length segments (without overruning the iovec).
+                */
+               while (bytes || unlikely(i->count && !bvec->bv_len)) {
+                       int copy;
+
+                       copy = min(bytes, bvec->bv_len - base);
+                       BUG_ON(!i->count || i->count < copy);
+                       i->count -= copy;
+                       bytes -= copy;
+                       base += copy;
+                       if (bvec->bv_len == base) {
+                               bvec++;
+                               nr_segs--;
+                               base = 0;
+                       }
+               }
+               i->bvec = bvec;
+               i->iov_offset = base;
+               i->nr_segs = nr_segs;
+       }
+}
+
+static unsigned long alignment_bvec(const struct iov_iter *i)
+{
+       const struct bio_vec *bvec = i->bvec;
+       unsigned long res;
+       size_t size = i->count;
+       size_t n;
+
+       if (!size)
+               return 0;
+
+       res = bvec->bv_offset + i->iov_offset;
+       n = bvec->bv_len - i->iov_offset;
+       if (n >= size)
+               return res | size;
+       size -= n;
+       res |= n;
+       while (size > (++bvec)->bv_len) {
+               res |= bvec->bv_offset | bvec->bv_len;
+               size -= bvec->bv_len;
+       }
+       res |= bvec->bv_offset | size;
+       return res;
+}
+
+static ssize_t get_pages_bvec(struct iov_iter *i,
+                  struct page **pages, size_t maxsize,
+                  size_t *start)
+{
+       const struct bio_vec *bvec = i->bvec;
+       size_t len = bvec->bv_len - i->iov_offset;
+       if (len > i->count)
+               len = i->count;
+       if (len > maxsize)
+               len = maxsize;
+       *start = bvec->bv_offset + i->iov_offset;
+
+       get_page(*pages = bvec->bv_page);
+
+       return len;
+}
+
+static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       const struct bio_vec *bvec = i->bvec;
+       size_t len = bvec->bv_len - i->iov_offset;
+       if (len > i->count)
+               len = i->count;
+       if (len > maxsize)
+               len = maxsize;
+       *start = bvec->bv_offset + i->iov_offset;
+
+       *pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+       if (!*pages)
+               return -ENOMEM;
+
+       get_page(**pages = bvec->bv_page);
+
+       return len;
+}
+
+static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
+{
+       size_t offset = i->iov_offset;
+       size_t size = i->count;
+       const struct bio_vec *bvec = i->bvec;
+       int npages = 0;
+       int n;
+
+       for (n = 0; size && n < i->nr_segs; n++, bvec++) {
+               size_t len = bvec->bv_len - offset;
+               offset = 0;
+               if (unlikely(!len))     /* empty segment */
+                       continue;
+               if (len > size)
+                       len = size;
+               npages++;
+               if (npages >= maxpages) /* don't bother going further */
+                       return maxpages;
+               size -= len;
+               offset = 0;
+       }
+       return min(npages, maxpages);
+}
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       if (i->type & ITER_BVEC)
+               return copy_page_to_iter_bvec(page, offset, bytes, i);
+       else
+               return copy_page_to_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       if (i->type & ITER_BVEC)
+               return copy_page_from_iter_bvec(page, offset, bytes, i);
+       else
+               return copy_page_from_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+       if (i->type & ITER_BVEC)
+               return copy_from_user_bvec(page, i, offset, bytes);
+       else
+               return copy_from_user_atomic_iovec(page, i, offset, bytes);
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+void iov_iter_advance(struct iov_iter *i, size_t size)
+{
+       if (i->type & ITER_BVEC)
+               advance_bvec(i, size);
+       else
+               advance_iovec(i, size);
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
 /*
  * Return the count of just the current iov_iter segment.
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
-       const struct iovec *iov = i->iov;
        if (i->nr_segs == 1)
                return i->count;
+       else if (i->type & ITER_BVEC)
+               return min(i->count, i->iov->iov_len - i->iov_offset);
        else
-               return min(i->count, iov->iov_len - i->iov_offset);
+               return min(i->count, i->bvec->bv_len - i->iov_offset);
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+       if (i->type & ITER_BVEC)
+               return alignment_bvec(i);
+       else
+               return alignment_iovec(i);
+}
+EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+                  struct page **pages, size_t maxsize,
+                  size_t *start)
+{
+       if (i->type & ITER_BVEC)
+               return get_pages_bvec(i, pages, maxsize, start);
+       else
+               return get_pages_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
+
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       if (i->type & ITER_BVEC)
+               return get_pages_alloc_bvec(i, pages, maxsize, start);
+       else
+               return get_pages_alloc_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+       if (i->type & ITER_BVEC)
+               return iov_iter_npages_bvec(i, maxpages);
+       else
+               return iov_iter_npages_iovec(i, maxpages);
+}
+EXPORT_SYMBOL(iov_iter_npages);
index 58b50d2901fe2a43a916bae15daffb5c8c6f1cbe..243a9b76e5cee9257d499311c21153084980c39a 100644 (file)
@@ -264,9 +264,17 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                struct kiocb kiocb;
                struct file *swap_file = sis->swap_file;
                struct address_space *mapping = swap_file->f_mapping;
-               struct iovec iov = {
-                       .iov_base = kmap(page),
-                       .iov_len  = PAGE_SIZE,
+               struct bio_vec bv = {
+                       .bv_page = page,
+                       .bv_len  = PAGE_SIZE,
+                       .bv_offset = 0
+               };
+               struct iov_iter from = {
+                       .type = ITER_BVEC | WRITE,
+                       .count = PAGE_SIZE,
+                       .iov_offset = 0,
+                       .nr_segs = 1,
+                       .bvec = &bv
                };
 
                init_sync_kiocb(&kiocb, swap_file);
@@ -275,10 +283,9 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
                set_page_writeback(page);
                unlock_page(page);
-               ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
-                                               &kiocb, &iov,
-                                               kiocb.ki_pos, 1);
-               kunmap(page);
+               ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE,
+                                               &kiocb, &from,
+                                               kiocb.ki_pos);
                if (ret == PAGE_SIZE) {
                        count_vm_event(PSWPOUT);
                        ret = 0;
index 8505c9262b35853e22580c6c9b74c4d12bc86acc..5077afcd9e116b16b17c7b0ed51930d570f88701 100644 (file)
@@ -46,11 +46,7 @@ static int process_vm_rw_pages(struct page **pages,
                        copy = len;
 
                if (vm_write) {
-                       if (copy > iov_iter_count(iter))
-                               copy = iov_iter_count(iter);
-                       copied = iov_iter_copy_from_user(page, iter,
-                                       offset, copy);
-                       iov_iter_advance(iter, copied);
+                       copied = copy_page_from_iter(page, offset, copy, iter);
                        set_page_dirty_lock(page);
                } else {
                        copied = copy_page_to_iter(page, offset, copy, iter);
@@ -278,7 +274,7 @@ static ssize_t process_vm_rw(pid_t pid,
        if (rc <= 0)
                goto free_iovecs;
 
-       iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+       iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 
        rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
                                   iovstack_r, &iov_r);
@@ -341,7 +337,7 @@ compat_process_vm_rw(compat_pid_t pid,
                                                  &iov_l);
        if (rc <= 0)
                goto free_iovecs;
-       iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+       iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
        rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
                                          UIO_FASTIOV, iovstack_r,
                                          &iov_r);
index 5402481c28d190a83718f6b1897eec78df4c0209..f484c276e994923a5c05577b42d5a9dcc58ae7cc 100644 (file)
@@ -1406,8 +1406,7 @@ shmem_write_end(struct file *file, struct address_space *mapping,
        return copied;
 }
 
-static ssize_t shmem_file_aio_read(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
+static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
@@ -1416,15 +1415,8 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
        unsigned long offset;
        enum sgp_type sgp = SGP_READ;
        int error = 0;
-       ssize_t retval;
-       size_t count;
+       ssize_t retval = 0;
        loff_t *ppos = &iocb->ki_pos;
-       struct iov_iter iter;
-
-       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-       if (retval)
-               return retval;
-       iov_iter_init(&iter, iov, nr_segs, count, 0);
 
        /*
         * Might this read be for a stacking filesystem?  Then when reading
@@ -1500,14 +1492,14 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
                 */
-               ret = copy_page_to_iter(page, offset, nr, &iter);
+               ret = copy_page_to_iter(page, offset, nr, to);
                retval += ret;
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
 
                page_cache_release(page);
-               if (!iov_iter_count(&iter))
+               if (!iov_iter_count(to))
                        break;
                if (ret < nr) {
                        error = -EFAULT;
@@ -2629,13 +2621,13 @@ static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
 #ifdef CONFIG_TMPFS
        .llseek         = shmem_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = shmem_file_aio_read,
-       .aio_write      = generic_file_aio_write,
+       .read           = new_sync_read,
+       .write          = new_sync_write,
+       .read_iter      = shmem_file_read_iter,
+       .write_iter     = generic_file_write_iter,
        .fsync          = noop_fsync,
        .splice_read    = shmem_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fallocate      = shmem_fallocate,
 #endif
 };
index e01ded365440704dbec95f0ec8f56326d646b9c2..0f16ffe8eb67c6fcd0350add4a5a4b6092cb6905 100644 (file)
@@ -464,7 +464,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
         * stalls if we need to run get_block().  We could test
         * PagePrivate for that.
         *
-        * If this process is currently in __generic_file_aio_write() against
+        * If this process is currently in __generic_file_write_iter() against
         * this page's queue, we can perform writeback even if that
         * will block.
         *
index 815a2249cfa9371f1f9505c7016b509b83d50100..555013034f7a899e2d03268a6571e01d096e8cee 100644 (file)
@@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
                        set_page_dirty_lock(pages[i]);
                put_page(pages[i]);
        }
-       kfree(pages);
+       if (is_vmalloc_addr(pages))
+               vfree(pages);
+       else
+               kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
 
@@ -164,36 +167,6 @@ void ceph_copy_from_page_vector(struct page **pages,
 }
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
-/*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
-                                        void __user *data,
-                                        loff_t off, size_t len)
-{
-       int i = 0;
-       int po = off & ~PAGE_CACHE_MASK;
-       int left = len;
-       int l, bad;
-
-       while (left > 0) {
-               l = min_t(int, left, PAGE_CACHE_SIZE-po);
-               bad = copy_to_user(data, page_address(pages[i]) + po, l);
-               if (bad == l)
-                       return -EFAULT;
-               data += l - bad;
-               left -= l - bad;
-               if (po) {
-                       po += l - bad;
-                       if (po == PAGE_CACHE_SIZE)
-                               po = 0;
-               }
-               i++;
-       }
-       return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
 /*
  * Zero an extent within a page vector.  Offset is relative to the
  * start of the first page.