From: Kent Overstreet Date: Sat, 6 Nov 2021 17:39:42 +0000 (-0400) Subject: bcachefs: Fallocate fixes X-Git-Tag: io_uring-6.7-2023-11-10~119^2~1300 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=74163da7c81ac5bb4ecd625f9e41a241ec5d8758;p=linux-block.git bcachefs: Fallocate fixes - fpunch wasn't always correctly updating i_size - when we drop buffered writes that were extending a file, we become responsible for writing i_size. - fzero was sometimes zeroing out more data that it should have - block_start and block_end were being rounded in the wrong directions Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 7de6b7a7aa60..12b785c5005f 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -2296,6 +2296,14 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, s->s[i].state = SECTOR_UNALLOCATED; } + /* + * Caller needs to know whether this page will be written out by + * writeback - doing an i_size update if necessary - or whether it will + * be responsible for the i_size update: + */ + ret = s->s[(min_t(u64, inode->v.i_size - (index << PAGE_SHIFT), + PAGE_SIZE) - 1) >> 9].state >= SECTOR_DIRTY; + zero_user_segment(page, start_offset, end_offset); /* @@ -2304,8 +2312,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, * XXX: because we aren't currently tracking whether the page has actual * data in it (vs. just 0s, or only partially written) this wrong. ick. */ - ret = bch2_get_page_disk_reservation(c, inode, page, false); - BUG_ON(ret); + BUG_ON(bch2_get_page_disk_reservation(c, inode, page, false)); /* * This removes any writeable userspace mappings; we need to force @@ -2327,6 +2334,20 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from) from, round_up(from, PAGE_SIZE)); } +static int bch2_truncate_pages(struct bch_inode_info *inode, + loff_t start, loff_t end) +{ + int ret = __bch2_truncate_page(inode, start >> PAGE_SHIFT, + start, end); + + if (ret >= 0 && + start >> PAGE_SHIFT != end >> PAGE_SHIFT) + ret = __bch2_truncate_page(inode, + end >> PAGE_SHIFT, + start, end); + return ret; +} + static int bch2_extend(struct mnt_idmap *idmap, struct bch_inode_info *inode, struct bch_inode_unpacked *inode_u, @@ -2417,7 +2438,7 @@ int bch2_truncate(struct mnt_idmap *idmap, iattr->ia_valid &= ~ATTR_SIZE; ret = bch2_truncate_page(inode, iattr->ia_size); - if (unlikely(ret)) + if (unlikely(ret < 0)) goto err; /* @@ -2483,48 +2504,39 @@ static int inode_update_times_fn(struct bch_inode_info *inode, static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - u64 discard_start = round_up(offset, block_bytes(c)) >> 9; - u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9; + u64 end = offset + len; + u64 block_start = round_up(offset, block_bytes(c)); + u64 block_end = round_down(end, block_bytes(c)); + bool truncated_last_page; int ret = 0; - inode_lock(&inode->v); - inode_dio_wait(&inode->v); - bch2_pagecache_block_get(&inode->ei_pagecache_lock); - - ret = __bch2_truncate_page(inode, - offset >> PAGE_SHIFT, - offset, offset + len); - if (unlikely(ret)) + ret = bch2_truncate_pages(inode, offset, end); + if (unlikely(ret < 0)) goto err; - if (offset >> PAGE_SHIFT != - (offset + len) >> PAGE_SHIFT) { - ret = __bch2_truncate_page(inode, - (offset + len) >> PAGE_SHIFT, - offset, offset + len); - if (unlikely(ret)) - goto err; - } + truncated_last_page = ret; - truncate_pagecache_range(&inode->v, offset, offset + len - 1); + truncate_pagecache_range(&inode->v, offset, end - 1); - if (discard_start < discard_end) { + if (block_start < block_end ) { s64 i_sectors_delta = 0; ret = bch2_fpunch(c, inode_inum(inode), - discard_start, discard_end, + block_start >> 9, block_end >> 9, &i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta); } mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_MTIME|ATTR_CTIME) ?: ret; + if (end >= inode->v.i_size && !truncated_last_page) { + ret = bch2_write_inode_size(c, inode, inode->v.i_size, + ATTR_MTIME|ATTR_CTIME); + } else { + ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_MTIME|ATTR_CTIME); + } mutex_unlock(&inode->ei_update_lock); err: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); - inode_unlock(&inode->v); - return ret; } @@ -2544,31 +2556,18 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, if ((offset | len) & (block_bytes(c) - 1)) return -EINVAL; - /* - * We need i_mutex to keep the page cache consistent with the extents - * btree, and the btree consistent with i_size - we don't need outside - * locking for the extents btree itself, because we're using linked - * iterators - */ - inode_lock(&inode->v); - inode_dio_wait(&inode->v); - bch2_pagecache_block_get(&inode->ei_pagecache_lock); - if (insert) { - ret = -EFBIG; if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) - goto err; + return -EFBIG; - ret = -EINVAL; if (offset >= inode->v.i_size) - goto err; + return -EINVAL; src_start = U64_MAX; shift = len; } else { - ret = -EINVAL; if (offset + len >= inode->v.i_size) - goto err; + return -EINVAL; src_start = offset + len; shift = -len; @@ -2578,7 +2577,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); if (ret) - goto err; + return ret; if (insert) { i_size_write(&inode->v, new_size); @@ -2595,7 +2594,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, i_sectors_acct(c, inode, NULL, i_sectors_delta); if (ret) - goto err; + return ret; } bch2_bkey_buf_init(©); @@ -2708,18 +2707,19 @@ reassemble: bch2_bkey_buf_exit(©, c); if (ret) - goto err; + return ret; + mutex_lock(&inode->ei_update_lock); if (!insert) { i_size_write(&inode->v, new_size); - mutex_lock(&inode->ei_update_lock); ret = bch2_write_inode_size(c, inode, new_size, ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); + } else { + /* We need an inode update to update bi_journal_seq for fsync: */ + ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_MTIME|ATTR_CTIME); } -err: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); - inode_unlock(&inode->v); + mutex_unlock(&inode->ei_update_lock); return ret; } @@ -2814,6 +2814,17 @@ bkey_err: if (ret == -EINTR) ret = 0; } + + if (ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)) { + struct quota_res quota_res = { 0 }; + s64 i_sectors_delta = 0; + + bch2_fpunch_at(&trans, &iter, inode_inum(inode), + end_sector, &i_sectors_delta); + i_sectors_acct(c, inode, "a_res, i_sectors_delta); + bch2_quota_reservation_put(c, inode, "a_res); + } + bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); return ret; @@ -2822,77 +2833,58 @@ bkey_err: static long bchfs_fallocate(struct bch_inode_info *inode, int mode, loff_t offset, loff_t len) { - struct address_space *mapping = inode->v.i_mapping; struct bch_fs *c = inode->v.i_sb->s_fs_info; - loff_t end = offset + len; - loff_t block_start = round_down(offset, block_bytes(c)); - loff_t block_end = round_up(end, block_bytes(c)); - int ret; - - inode_lock(&inode->v); - inode_dio_wait(&inode->v); - bch2_pagecache_block_get(&inode->ei_pagecache_lock); + u64 end = offset + len; + u64 block_start = round_down(offset, block_bytes(c)); + u64 block_end = round_up(end, block_bytes(c)); + bool truncated_last_page = false; + int ret, ret2 = 0; if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { ret = inode_newsize_ok(&inode->v, end); if (ret) - goto err; + return ret; } if (mode & FALLOC_FL_ZERO_RANGE) { - ret = __bch2_truncate_page(inode, - offset >> PAGE_SHIFT, - offset, end); - - if (!ret && - offset >> PAGE_SHIFT != end >> PAGE_SHIFT) - ret = __bch2_truncate_page(inode, - end >> PAGE_SHIFT, - offset, end); + ret = bch2_truncate_pages(inode, offset, end); + if (unlikely(ret < 0)) + return ret; - if (unlikely(ret)) - goto err; + truncated_last_page = ret; truncate_pagecache_range(&inode->v, offset, end - 1); + + block_start = round_up(offset, block_bytes(c)); + block_end = round_down(end, block_bytes(c)); } ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); - if (ret) - goto err; /* - * Do we need to extend the file? - * - * If we zeroed up to the end of the file, we dropped whatever writes - * were going to write out the current i_size, so we have to extend - * manually even if FL_KEEP_SIZE was set: + * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update, + * so that the VFS cache i_size is consistent with the btree i_size: */ - if (end >= inode->v.i_size && - (!(mode & FALLOC_FL_KEEP_SIZE) || - (mode & FALLOC_FL_ZERO_RANGE))) { + if (ret && + !(ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE))) + return ret; - /* - * Sync existing appends before extending i_size, - * as in bch2_extend(): - */ - ret = filemap_write_and_wait_range(mapping, - inode->ei_inode.bi_size, S64_MAX); - if (ret) - goto err; + if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size) + end = inode->v.i_size; - if (mode & FALLOC_FL_KEEP_SIZE) - end = inode->v.i_size; - else - i_size_write(&inode->v, end); + if (end >= inode->v.i_size && + (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || + !(mode & FALLOC_FL_KEEP_SIZE))) { + spin_lock(&inode->v.i_lock); + i_size_write(&inode->v, end); + spin_unlock(&inode->v.i_lock); mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode_size(c, inode, end, 0); + ret2 = bch2_write_inode_size(c, inode, end, 0); mutex_unlock(&inode->ei_update_lock); } -err: - bch2_pagecache_block_put(&inode->ei_pagecache_lock); - inode_unlock(&inode->v); - return ret; + + return ret ?: ret2; } long bch2_fallocate_dispatch(struct file *file, int mode, @@ -2905,6 +2897,10 @@ long bch2_fallocate_dispatch(struct file *file, int mode, if (!percpu_ref_tryget(&c->writes)) return -EROFS; + inode_lock(&inode->v); + inode_dio_wait(&inode->v); + bch2_pagecache_block_get(&inode->ei_pagecache_lock); + if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) ret = bchfs_fallocate(inode, mode, offset, len); else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) @@ -2916,6 +2912,9 @@ long bch2_fallocate_dispatch(struct file *file, int mode, else ret = -EOPNOTSUPP; + + bch2_pagecache_block_put(&inode->ei_pagecache_lock); + inode_unlock(&inode->v); percpu_ref_put(&c->writes); return ret;