exfat: change to get file size from DataLength
authorYuezhang Mo <Yuezhang.Mo@sony.com>
Mon, 13 Mar 2023 04:38:53 +0000 (12:38 +0800)
committerNamjae Jeon <linkinjeon@kernel.org>
Mon, 8 Jan 2024 12:57:22 +0000 (21:57 +0900)
In stream extension directory entry, the ValidDataLength
field describes how far into the data stream user data has
been written, and the DataLength field describes the file
size.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
fs/exfat/exfat_fs.h
fs/exfat/file.c
fs/exfat/inode.c
fs/exfat/namei.c

index 8030780a199b4448dd64d786063866c581d33bc8..9474cd50da6d4fd8b9fba92f1f3d8717f19245dc 100644 (file)
@@ -207,6 +207,7 @@ struct exfat_dir_entry {
        unsigned char flags;
        unsigned short attr;
        loff_t size;
+       loff_t valid_size;
        unsigned int num_subdirs;
        struct timespec64 atime;
        struct timespec64 mtime;
@@ -316,6 +317,7 @@ struct exfat_inode_info {
        loff_t i_size_aligned;
        /* on-disk position of directory entry or 0 */
        loff_t i_pos;
+       loff_t valid_size;
        /* hash by i_location */
        struct hlist_node i_hash_fat;
        /* protect bmap against truncate */
index bfdfafe0099309f59da5c19b27eee2f4ed10db31..270e2f934124e0edca8923859426a15875615db5 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/msdos_fs.h>
+#include <linux/writeback.h>
 
 #include "exfat_raw.h"
 #include "exfat_fs.h"
@@ -26,6 +27,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
                return err;
 
        inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+       EXFAT_I(inode)->valid_size = size;
        mark_inode_dirty(inode);
 
        if (!IS_SYNC(inode))
@@ -146,6 +148,9 @@ int __exfat_truncate(struct inode *inode)
                ei->start_clu = EXFAT_EOF_CLUSTER;
        }
 
+       if (i_size_read(inode) < ei->valid_size)
+               ei->valid_size = i_size_read(inode);
+
        if (ei->type == TYPE_FILE)
                ei->attr |= EXFAT_ATTR_ARCHIVE;
 
@@ -474,15 +479,124 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
        return blkdev_issue_flush(inode->i_sb->s_bdev);
 }
 
+static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
+{
+       int err;
+       struct inode *inode = file_inode(file);
+       struct address_space *mapping = inode->i_mapping;
+       const struct address_space_operations *ops = mapping->a_ops;
+
+       while (start < end) {
+               u32 zerofrom, len;
+               struct page *page = NULL;
+
+               zerofrom = start & (PAGE_SIZE - 1);
+               len = PAGE_SIZE - zerofrom;
+               if (start + len > end)
+                       len = end - start;
+
+               err = ops->write_begin(file, mapping, start, len, &page, NULL);
+               if (err)
+                       goto out;
+
+               zero_user_segment(page, zerofrom, zerofrom + len);
+
+               err = ops->write_end(file, mapping, start, len, len, page, NULL);
+               if (err < 0)
+                       goto out;
+               start += len;
+
+               balance_dirty_pages_ratelimited(mapping);
+               cond_resched();
+       }
+
+out:
+       return err;
+}
+
+static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       ssize_t ret;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct exfat_inode_info *ei = EXFAT_I(inode);
+       loff_t pos = iocb->ki_pos;
+       loff_t valid_size;
+
+       inode_lock(inode);
+
+       valid_size = ei->valid_size;
+
+       ret = generic_write_checks(iocb, iter);
+       if (ret < 0)
+               goto unlock;
+
+       if (pos > valid_size) {
+               ret = exfat_file_zeroed_range(file, valid_size, pos);
+               if (ret < 0 && ret != -ENOSPC) {
+                       exfat_err(inode->i_sb,
+                               "write: fail to zero from %llu to %llu(%zd)",
+                               valid_size, pos, ret);
+               }
+               if (ret < 0)
+                       goto unlock;
+       }
+
+       ret = __generic_file_write_iter(iocb, iter);
+       if (ret < 0)
+               goto unlock;
+
+       inode_unlock(inode);
+
+       if (pos > valid_size)
+               pos = valid_size;
+
+       if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) {
+               ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1,
+                               iocb->ki_flags & IOCB_SYNC);
+               if (err < 0)
+                       return err;
+       }
+
+       return ret;
+
+unlock:
+       inode_unlock(inode);
+
+       return ret;
+}
+
+static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       int ret;
+       struct inode *inode = file_inode(file);
+       struct exfat_inode_info *ei = EXFAT_I(inode);
+       loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+       loff_t end = min_t(loff_t, i_size_read(inode),
+                       start + vma->vm_end - vma->vm_start);
+
+       if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) {
+               ret = exfat_file_zeroed_range(file, ei->valid_size, end);
+               if (ret < 0) {
+                       exfat_err(inode->i_sb,
+                                 "mmap: fail to zero from %llu to %llu(%d)",
+                                 start, end, ret);
+                       return ret;
+               }
+       }
+
+       return generic_file_mmap(file, vma);
+}
+
 const struct file_operations exfat_file_operations = {
        .llseek         = generic_file_llseek,
        .read_iter      = generic_file_read_iter,
-       .write_iter     = generic_file_write_iter,
+       .write_iter     = exfat_file_write_iter,
        .unlocked_ioctl = exfat_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = exfat_compat_ioctl,
 #endif
-       .mmap           = generic_file_mmap,
+       .mmap           = exfat_file_mmap,
        .fsync          = exfat_file_fsync,
        .splice_read    = filemap_splice_read,
        .splice_write   = iter_file_splice_write,
index e7ff58b8e68c78d53c01c8126cd1b6276c632a8c..b02677c9fd459ce19bdd389c1c00a03a7001eaa8 100644 (file)
@@ -75,8 +75,8 @@ int __exfat_write_inode(struct inode *inode, int sync)
        if (ei->start_clu == EXFAT_EOF_CLUSTER)
                on_disk_size = 0;
 
-       ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_size);
-       ep2->dentry.stream.size = ep2->dentry.stream.valid_size;
+       ep2->dentry.stream.valid_size = cpu_to_le64(ei->valid_size);
+       ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
        if (on_disk_size) {
                ep2->dentry.stream.flags = ei->flags;
                ep2->dentry.stream.start_clu = cpu_to_le32(ei->start_clu);
@@ -278,6 +278,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
        unsigned int cluster, sec_offset;
        sector_t last_block;
        sector_t phys = 0;
+       sector_t valid_blks;
        loff_t pos;
 
        mutex_lock(&sbi->s_lock);
@@ -306,17 +307,32 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
        mapped_blocks = sbi->sect_per_clus - sec_offset;
        max_blocks = min(mapped_blocks, max_blocks);
 
-       /* Treat newly added block / cluster */
-       if (iblock < last_block)
-               create = 0;
-
-       if (create || buffer_delay(bh_result)) {
-               pos = EXFAT_BLK_TO_B((iblock + 1), sb);
+       pos = EXFAT_BLK_TO_B((iblock + 1), sb);
+       if ((create && iblock >= last_block) || buffer_delay(bh_result)) {
                if (ei->i_size_ondisk < pos)
                        ei->i_size_ondisk = pos;
        }
 
+       map_bh(bh_result, sb, phys);
+       if (buffer_delay(bh_result))
+               clear_buffer_delay(bh_result);
+
        if (create) {
+               valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb);
+
+               if (iblock + max_blocks < valid_blks) {
+                       /* The range has been written, map it */
+                       goto done;
+               } else if (iblock < valid_blks) {
+                       /*
+                        * The range has been partially written,
+                        * map the written part.
+                        */
+                       max_blocks = valid_blks - iblock;
+                       goto done;
+               }
+
+               /* The area has not been written, map and mark as new. */
                err = exfat_map_new_buffer(ei, bh_result, pos);
                if (err) {
                        exfat_fs_error(sb,
@@ -324,11 +340,55 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
                                        pos, ei->i_size_aligned);
                        goto unlock_ret;
                }
+       } else {
+               valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);
+
+               if (iblock + max_blocks < valid_blks) {
+                       /* The range has been written, map it */
+                       goto done;
+               } else if (iblock < valid_blks) {
+                       /*
+                        * The area has been partially written,
+                        * map the written part.
+                        */
+                       max_blocks = valid_blks - iblock;
+                       goto done;
+               } else if (iblock == valid_blks &&
+                          (ei->valid_size & (sb->s_blocksize - 1))) {
+                       /*
+                        * The block has been partially written,
+                        * zero the unwritten part and map the block.
+                        */
+                       loff_t size, off;
+
+                       max_blocks = 1;
+
+                       /*
+                        * For direct read, the unwritten part will be zeroed in
+                        * exfat_direct_IO()
+                        */
+                       if (!bh_result->b_folio)
+                               goto done;
+
+                       pos -= sb->s_blocksize;
+                       size = ei->valid_size - pos;
+                       off = pos & (PAGE_SIZE - 1);
+
+                       folio_set_bh(bh_result, bh_result->b_folio, off);
+                       err = bh_read(bh_result, 0);
+                       if (err < 0)
+                               goto unlock_ret;
+
+                       folio_zero_segment(bh_result->b_folio, off + size,
+                                       off + sb->s_blocksize);
+               } else {
+                       /*
+                        * The range has not been written, clear the mapped flag
+                        * to only zero the cache and do not read from disk.
+                        */
+                       clear_buffer_mapped(bh_result);
+               }
        }
-
-       if (buffer_delay(bh_result))
-               clear_buffer_delay(bh_result);
-       map_bh(bh_result, sb, phys);
 done:
        bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb);
 unlock_ret:
@@ -343,6 +403,17 @@ static int exfat_read_folio(struct file *file, struct folio *folio)
 
 static void exfat_readahead(struct readahead_control *rac)
 {
+       struct address_space *mapping = rac->mapping;
+       struct inode *inode = mapping->host;
+       struct exfat_inode_info *ei = EXFAT_I(inode);
+       loff_t pos = readahead_pos(rac);
+
+       /* Range cross valid_size, read it page by page. */
+       if (ei->valid_size < i_size_read(inode) &&
+           pos <= ei->valid_size &&
+           ei->valid_size < pos + readahead_length(rac))
+               return;
+
        mpage_readahead(rac, exfat_get_block);
 }
 
@@ -370,9 +441,7 @@ static int exfat_write_begin(struct file *file, struct address_space *mapping,
        int ret;
 
        *pagep = NULL;
-       ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
-                              exfat_get_block,
-                              &EXFAT_I(mapping->host)->i_size_ondisk);
+       ret = block_write_begin(mapping, pos, len, pagep, exfat_get_block);
 
        if (ret < 0)
                exfat_write_failed(mapping, pos+len);
@@ -400,6 +469,11 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
        if (err < len)
                exfat_write_failed(mapping, pos+len);
 
+       if (!(err < 0) && pos + err > ei->valid_size) {
+               ei->valid_size = pos + err;
+               mark_inode_dirty(inode);
+       }
+
        if (!(err < 0) && !(ei->attr & EXFAT_ATTR_ARCHIVE)) {
                inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
                ei->attr |= EXFAT_ATTR_ARCHIVE;
@@ -413,6 +487,8 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
        struct address_space *mapping = iocb->ki_filp->f_mapping;
        struct inode *inode = mapping->host;
+       struct exfat_inode_info *ei = EXFAT_I(inode);
+       loff_t pos = iocb->ki_pos;
        loff_t size = iocb->ki_pos + iov_iter_count(iter);
        int rw = iov_iter_rw(iter);
        ssize_t ret;
@@ -436,8 +512,21 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
         * condition of exfat_get_block() and ->truncate().
         */
        ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
-       if (ret < 0 && (rw & WRITE))
-               exfat_write_failed(mapping, size);
+       if (ret < 0) {
+               if (rw == WRITE)
+                       exfat_write_failed(mapping, size);
+
+               if (ret != -EIOCBQUEUED)
+                       return ret;
+       } else
+               size = pos + ret;
+
+       /* zero the unwritten part in the partially written block */
+       if (rw == READ && pos < ei->valid_size && ei->valid_size < size) {
+               iov_iter_revert(iter, size - ei->valid_size);
+               iov_iter_zero(size - ei->valid_size, iter);
+       }
+
        return ret;
 }
 
@@ -537,6 +626,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
        ei->start_clu = info->start_clu;
        ei->flags = info->flags;
        ei->type = info->type;
+       ei->valid_size = info->valid_size;
 
        ei->version = 0;
        ei->hint_stat.eidx = 0;
index 5d737e0b639a14747ac6ba15f1cdd72163818c2a..9c549fd11fc847055a35f7a5872dc044b1576796 100644 (file)
@@ -406,6 +406,7 @@ static int exfat_find_empty_entry(struct inode *inode,
                i_size_write(inode, size);
                ei->i_size_ondisk += sbi->cluster_size;
                ei->i_size_aligned += sbi->cluster_size;
+               ei->valid_size += sbi->cluster_size;
                ei->flags = p_dir->flags;
                inode->i_blocks += sbi->cluster_size >> 9;
        }
@@ -558,6 +559,8 @@ static int exfat_add_entry(struct inode *inode, const char *path,
                info->size = clu_size;
                info->num_subdirs = EXFAT_MIN_SUBDIR;
        }
+       info->valid_size = info->size;
+
        memset(&info->crtime, 0, sizeof(info->crtime));
        memset(&info->mtime, 0, sizeof(info->mtime));
        memset(&info->atime, 0, sizeof(info->atime));
@@ -660,6 +663,8 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
        info->type = exfat_get_entry_type(ep);
        info->attr = le16_to_cpu(ep->dentry.file.attr);
        info->size = le64_to_cpu(ep2->dentry.stream.valid_size);
+       info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size);
+       info->size = le64_to_cpu(ep2->dentry.stream.size);
        if (info->size == 0) {
                info->flags = ALLOC_NO_FAT_CHAIN;
                info->start_clu = EXFAT_EOF_CLUSTER;
@@ -1288,6 +1293,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
                        }
 
                        i_size_write(new_inode, 0);
+                       new_ei->valid_size = 0;
                        new_ei->start_clu = EXFAT_EOF_CLUSTER;
                        new_ei->flags = ALLOC_NO_FAT_CHAIN;
                }