iomap: Add done_before argument to iomap_dio_rw
[linux-block.git] / fs / gfs2 / file.c
index c559827cb6f915a505fa225fa9a84bea71901923..f772ee0fcae3daa132ab1399aa1598369d0c1530 100644 (file)
@@ -776,6 +776,36 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
        return ret ? ret : ret1;
 }
 
+static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
+                                        size_t *prev_count,
+                                        size_t *window_size)
+{
+       char __user *p = i->iov[0].iov_base + i->iov_offset;
+       size_t count = iov_iter_count(i);
+       int pages = 1;
+
+       if (likely(!count))
+               return false;
+       if (ret <= 0 && ret != -EFAULT)
+               return false;
+       if (!iter_is_iovec(i))
+               return false;
+
+       if (*prev_count != count || !*window_size) {
+               int pages, nr_dirtied;
+
+               pages = min_t(int, BIO_MAX_VECS,
+                             DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
+               nr_dirtied = max(current->nr_dirtied_pause -
+                                current->nr_dirtied, 1);
+               pages = min(pages, nr_dirtied);
+       }
+
+       *prev_count = count;
+       *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
+       return true;
+}
+
 static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
                                     struct gfs2_holder *gh)
 {
@@ -792,7 +822,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
        if (ret)
                goto out_uninit;
 
-       ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0);
+       ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
        gfs2_glock_dq(gh);
 out_uninit:
        gfs2_holder_uninit(gh);
@@ -826,7 +856,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
        if (offset + len > i_size_read(&ip->i_inode))
                goto out;
 
-       ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0);
+       ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
        if (ret == -ENOTBLK)
                ret = 0;
 out:
@@ -840,9 +870,17 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
        struct gfs2_inode *ip;
        struct gfs2_holder gh;
+       size_t prev_count = 0, window_size = 0;
        size_t written = 0;
        ssize_t ret;
 
+       /*
+        * In this function, we disable page faults when we're holding the
+        * inode glock while doing I/O.  If a page fault occurs, we indicate
+        * that the inode glock may be dropped, fault in the pages manually,
+        * and retry.
+        */
+
        if (iocb->ki_flags & IOCB_DIRECT) {
                ret = gfs2_file_direct_read(iocb, to, &gh);
                if (likely(ret != -ENOTBLK))
@@ -864,18 +902,118 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        }
        ip = GFS2_I(iocb->ki_filp->f_mapping->host);
        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+retry:
        ret = gfs2_glock_nq(&gh);
        if (ret)
                goto out_uninit;
+retry_under_glock:
+       pagefault_disable();
        ret = generic_file_read_iter(iocb, to);
+       pagefault_enable();
        if (ret > 0)
                written += ret;
-       gfs2_glock_dq(&gh);
+
+       if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
+               size_t leftover;
+
+               gfs2_holder_allow_demote(&gh);
+               leftover = fault_in_iov_iter_writeable(to, window_size);
+               gfs2_holder_disallow_demote(&gh);
+               if (leftover != window_size) {
+                       if (!gfs2_holder_queued(&gh)) {
+                               if (written)
+                                       goto out_uninit;
+                               goto retry;
+                       }
+                       goto retry_under_glock;
+               }
+       }
+       if (gfs2_holder_queued(&gh))
+               gfs2_glock_dq(&gh);
 out_uninit:
        gfs2_holder_uninit(&gh);
        return written ? written : ret;
 }
 
+static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
+                                       struct iov_iter *from,
+                                       struct gfs2_holder *gh)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       struct gfs2_holder *statfs_gh = NULL;
+       size_t prev_count = 0, window_size = 0;
+       size_t read = 0;
+       ssize_t ret;
+
+       /*
+        * In this function, we disable page faults when we're holding the
+        * inode glock while doing I/O.  If a page fault occurs, we indicate
+        * that the inode glock may be dropped, fault in the pages manually,
+        * and retry.
+        */
+
+       if (inode == sdp->sd_rindex) {
+               statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS);
+               if (!statfs_gh)
+                       return -ENOMEM;
+       }
+
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
+retry:
+       ret = gfs2_glock_nq(gh);
+       if (ret)
+               goto out_uninit;
+retry_under_glock:
+       if (inode == sdp->sd_rindex) {
+               struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+               ret = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
+                                        GL_NOCACHE, statfs_gh);
+               if (ret)
+                       goto out_unlock;
+       }
+
+       current->backing_dev_info = inode_to_bdi(inode);
+       pagefault_disable();
+       ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+       pagefault_enable();
+       current->backing_dev_info = NULL;
+       if (ret > 0) {
+               iocb->ki_pos += ret;
+               read += ret;
+       }
+
+       if (inode == sdp->sd_rindex)
+               gfs2_glock_dq_uninit(statfs_gh);
+
+       if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
+               size_t leftover;
+
+               gfs2_holder_allow_demote(gh);
+               leftover = fault_in_iov_iter_readable(from, window_size);
+               gfs2_holder_disallow_demote(gh);
+               if (leftover != window_size) {
+                       if (!gfs2_holder_queued(gh)) {
+                               if (read)
+                                       goto out_uninit;
+                               goto retry;
+                       }
+                       goto retry_under_glock;
+               }
+       }
+out_unlock:
+       if (gfs2_holder_queued(gh))
+               gfs2_glock_dq(gh);
+out_uninit:
+       gfs2_holder_uninit(gh);
+       if (statfs_gh)
+               kfree(statfs_gh);
+       return read ? read : ret;
+}
+
 /**
  * gfs2_file_write_iter - Perform a write to a file
  * @iocb: The io context
@@ -927,9 +1065,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                        goto out_unlock;
 
                iocb->ki_flags |= IOCB_DSYNC;
-               current->backing_dev_info = inode_to_bdi(inode);
-               buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
-               current->backing_dev_info = NULL;
+               buffered = gfs2_file_buffered_write(iocb, from, &gh);
                if (unlikely(buffered <= 0)) {
                        if (!ret)
                                ret = buffered;
@@ -943,7 +1079,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                 * the direct I/O range as we don't know if the buffered pages
                 * made it to disk.
                 */
-               iocb->ki_pos += buffered;
                ret2 = generic_write_sync(iocb, buffered);
                invalidate_mapping_pages(mapping,
                                (iocb->ki_pos - buffered) >> PAGE_SHIFT,
@@ -951,13 +1086,9 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
                if (!ret || ret2 > 0)
                        ret += ret2;
        } else {
-               current->backing_dev_info = inode_to_bdi(inode);
-               ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
-               current->backing_dev_info = NULL;
-               if (likely(ret > 0)) {
-                       iocb->ki_pos += ret;
+               ret = gfs2_file_buffered_write(iocb, from, &gh);
+               if (likely(ret > 0))
                        ret = generic_write_sync(iocb, ret);
-               }
        }
 
 out_unlock: