fs/ext4/super.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/fs/ext4/super.c
   4  *
   5  * Copyright (C) 1992, 1993, 1994, 1995
   6  * Remy Card (card@masi.ibp.fr)
   7  * Laboratoire MASI - Institut Blaise Pascal
   8  * Universite Pierre et Marie Curie (Paris VI)
   9  *
  10  *  from
  11  *
  12  *  linux/fs/minix/inode.c
  13  *
  14  *  Copyright (C) 1991, 1992  Linus Torvalds
  15  *
  16  *  Big-endian to little-endian byte-swapping/bitmaps by
  17  *        David S. Miller (davem@caip.rutgers.edu), 1995
  18  */
  19
  20 #include <linux/module.h>
  21 #include <linux/string.h>
  22 #include <linux/fs.h>
  23 #include <linux/time.h>
  24 #include <linux/vmalloc.h>
  25 #include <linux/slab.h>
  26 #include <linux/init.h>
  27 #include <linux/blkdev.h>
  28 #include <linux/backing-dev.h>
  29 #include <linux/parser.h>
  30 #include <linux/buffer_head.h>
  31 #include <linux/exportfs.h>
  32 #include <linux/vfs.h>
  33 #include <linux/random.h>
  34 #include <linux/mount.h>
  35 #include <linux/namei.h>
  36 #include <linux/quotaops.h>
  37 #include <linux/seq_file.h>
  38 #include <linux/ctype.h>
  39 #include <linux/log2.h>
  40 #include <linux/crc16.h>
  41 #include <linux/dax.h>
  42 #include <linux/uaccess.h>
  43 #include <linux/iversion.h>
  44 #include <linux/unicode.h>
  45 #include <linux/part_stat.h>
  46 #include <linux/kthread.h>
  47 #include <linux/freezer.h>
  48 #include <linux/fsnotify.h>
  49 #include <linux/fs_context.h>
  50 #include <linux/fs_parser.h>
  51
  52 #include "ext4.h"
  53 #include "ext4_extents.h"       /* Needed for trace points definition */
  54 #include "ext4_jbd2.h"
  55 #include "xattr.h"
  56 #include "acl.h"
  57 #include "mballoc.h"
  58 #include "fsmap.h"
  59
  60 #define CREATE_TRACE_POINTS
  61 #include <trace/events/ext4.h>
  62
  63 static struct ext4_lazy_init *ext4_li_info;
  64 static DEFINE_MUTEX(ext4_li_mtx);
  65 static struct ratelimit_state ext4_mount_msg_ratelimit;
  66
  67 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  68                              unsigned long journal_devnum);
  69 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  70 static void ext4_update_super(struct super_block *sb);
  71 static int ext4_commit_super(struct super_block *sb);
  72 static int ext4_mark_recovery_complete(struct super_block *sb,
  73                                         struct ext4_super_block *es);
  74 static int ext4_clear_journal_err(struct super_block *sb,
  75                                   struct ext4_super_block *es);
  76 static int ext4_sync_fs(struct super_block *sb, int wait);
  77 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  78 static int ext4_unfreeze(struct super_block *sb);
  79 static int ext4_freeze(struct super_block *sb);
  80 static inline int ext2_feature_set_ok(struct super_block *sb);
  81 static inline int ext3_feature_set_ok(struct super_block *sb);
  82 static void ext4_destroy_lazyinit_thread(void);
  83 static void ext4_unregister_li_request(struct super_block *sb);
  84 static void ext4_clear_request_list(void);
  85 static struct inode *ext4_get_journal_inode(struct super_block *sb,
  86                                             unsigned int journal_inum);
  87 static int ext4_validate_options(struct fs_context *fc);
  88 static int ext4_check_opt_consistency(struct fs_context *fc,
  89                                       struct super_block *sb);
  90 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
  91 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
  92 static int ext4_get_tree(struct fs_context *fc);
  93 static int ext4_reconfigure(struct fs_context *fc);
  94 static void ext4_fc_free(struct fs_context *fc);
  95 static int ext4_init_fs_context(struct fs_context *fc);
  96 static const struct fs_parameter_spec ext4_param_specs[];
  97
  98 /*
  99  * Lock ordering
 100  *
 101  * page fault path:
 102  * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
 103  *   -> page lock -> i_data_sem (rw)
 104  *
 105  * buffered write path:
 106  * sb_start_write -> i_mutex -> mmap_lock
 107  * sb_start_write -> i_mutex -> transaction start -> page lock ->
 108  *   i_data_sem (rw)
 109  *
 110  * truncate:
 111  * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
 112  *   page lock
 113  * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
 114  *   i_data_sem (rw)
 115  *
 116  * direct IO:
 117  * sb_start_write -> i_mutex -> mmap_lock
 118  * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
 119  *
 120  * writepages:
 121  * transaction start -> page lock(s) -> i_data_sem (rw)
 122  */
 123
 124 static const struct fs_context_operations ext4_context_ops = {
 125         .parse_param    = ext4_parse_param,
 126         .get_tree       = ext4_get_tree,
 127         .reconfigure    = ext4_reconfigure,
 128         .free           = ext4_fc_free,
 129 };
 130
 131
 132 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 133 static struct file_system_type ext2_fs_type = {
 134         .owner                  = THIS_MODULE,
 135         .name                   = "ext2",
 136         .init_fs_context        = ext4_init_fs_context,
 137         .parameters             = ext4_param_specs,
 138         .kill_sb                = kill_block_super,
 139         .fs_flags               = FS_REQUIRES_DEV,
 140 };
 141 MODULE_ALIAS_FS("ext2");
 142 MODULE_ALIAS("ext2");
 143 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 144 #else
 145 #define IS_EXT2_SB(sb) (0)
 146 #endif
 147
 148
 149 static struct file_system_type ext3_fs_type = {
 150         .owner                  = THIS_MODULE,
 151         .name                   = "ext3",
 152         .init_fs_context        = ext4_init_fs_context,
 153         .parameters             = ext4_param_specs,
 154         .kill_sb                = kill_block_super,
 155         .fs_flags               = FS_REQUIRES_DEV,
 156 };
 157 MODULE_ALIAS_FS("ext3");
 158 MODULE_ALIAS("ext3");
 159 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 160
 161
 162 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
 163                                   bh_end_io_t *end_io)
 164 {
 165         /*
 166          * buffer's verified bit is no longer valid after reading from
 167          * disk again due to write out error, clear it to make sure we
 168          * recheck the buffer contents.
 169          */
 170         clear_buffer_verified(bh);
 171
 172         bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
 173         get_bh(bh);
 174         submit_bh(REQ_OP_READ | op_flags, bh);
 175 }
 176
 177 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
 178                          bh_end_io_t *end_io)
 179 {
 180         BUG_ON(!buffer_locked(bh));
 181
 182         if (ext4_buffer_uptodate(bh)) {
 183                 unlock_buffer(bh);
 184                 return;
 185         }
 186         __ext4_read_bh(bh, op_flags, end_io);
 187 }
 188
 189 int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
 190 {
 191         BUG_ON(!buffer_locked(bh));
 192
 193         if (ext4_buffer_uptodate(bh)) {
 194                 unlock_buffer(bh);
 195                 return 0;
 196         }
 197
 198         __ext4_read_bh(bh, op_flags, end_io);
 199
 200         wait_on_buffer(bh);
 201         if (buffer_uptodate(bh))
 202                 return 0;
 203         return -EIO;
 204 }
 205
 206 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
 207 {
 208         if (trylock_buffer(bh)) {
 209                 if (wait)
 210                         return ext4_read_bh(bh, op_flags, NULL);
 211                 ext4_read_bh_nowait(bh, op_flags, NULL);
 212                 return 0;
 213         }
 214         if (wait) {
 215                 wait_on_buffer(bh);
 216                 if (buffer_uptodate(bh))
 217                         return 0;
 218                 return -EIO;
 219         }
 220         return 0;
 221 }
 222
 223 /*
 224  * This works like __bread_gfp() except it uses ERR_PTR for error
 225  * returns.  Currently with sb_bread it's impossible to distinguish
 226  * between ENOMEM and EIO situations (since both result in a NULL
 227  * return.
 228  */
 229 static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
 230                                                sector_t block,
 231                                                blk_opf_t op_flags, gfp_t gfp)
 232 {
 233         struct buffer_head *bh;
 234         int ret;
 235
 236         bh = sb_getblk_gfp(sb, block, gfp);
 237         if (bh == NULL)
 238                 return ERR_PTR(-ENOMEM);
 239         if (ext4_buffer_uptodate(bh))
 240                 return bh;
 241
 242         ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
 243         if (ret) {
 244                 put_bh(bh);
 245                 return ERR_PTR(ret);
 246         }
 247         return bh;
 248 }
 249
 250 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
 251                                    blk_opf_t op_flags)
 252 {
 253         return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
 254 }
 255
 256 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
 257                                             sector_t block)
 258 {
 259         return __ext4_sb_bread_gfp(sb, block, 0, 0);
 260 }
 261
 262 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
 263 {
 264         struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
 265
 266         if (likely(bh)) {
 267                 ext4_read_bh_lock(bh, REQ_RAHEAD, false);
 268                 brelse(bh);
 269         }
 270 }
 271
 272 static int ext4_verify_csum_type(struct super_block *sb,
 273                                  struct ext4_super_block *es)
 274 {
 275         if (!ext4_has_feature_metadata_csum(sb))
 276                 return 1;
 277
 278         return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 279 }
 280
 281 __le32 ext4_superblock_csum(struct super_block *sb,
 282                             struct ext4_super_block *es)
 283 {
 284         struct ext4_sb_info *sbi = EXT4_SB(sb);
 285         int offset = offsetof(struct ext4_super_block, s_checksum);
 286         __u32 csum;
 287
 288         csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 289
 290         return cpu_to_le32(csum);
 291 }
 292
 293 static int ext4_superblock_csum_verify(struct super_block *sb,
 294                                        struct ext4_super_block *es)
 295 {
 296         if (!ext4_has_metadata_csum(sb))
 297                 return 1;
 298
 299         return es->s_checksum == ext4_superblock_csum(sb, es);
 300 }
 301
 302 void ext4_superblock_csum_set(struct super_block *sb)
 303 {
 304         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 305
 306         if (!ext4_has_metadata_csum(sb))
 307                 return;
 308
 309         es->s_checksum = ext4_superblock_csum(sb, es);
 310 }
 311
 312 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 313                                struct ext4_group_desc *bg)
 314 {
 315         return le32_to_cpu(bg->bg_block_bitmap_lo) |
 316                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 317                  (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 318 }
 319
 320 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 321                                struct ext4_group_desc *bg)
 322 {
 323         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 324                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 325                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 326 }
 327
 328 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 329                               struct ext4_group_desc *bg)
 330 {
 331         return le32_to_cpu(bg->bg_inode_table_lo) |
 332                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 333                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 334 }
 335
 336 __u32 ext4_free_group_clusters(struct super_block *sb,
 337                                struct ext4_group_desc *bg)
 338 {
 339         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 340                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 341                  (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 342 }
 343
 344 __u32 ext4_free_inodes_count(struct super_block *sb,
 345                               struct ext4_group_desc *bg)
 346 {
 347         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 348                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 349                  (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 350 }
 351
 352 __u32 ext4_used_dirs_count(struct super_block *sb,
 353                               struct ext4_group_desc *bg)
 354 {
 355         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 356                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 357                  (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 358 }
 359
 360 __u32 ext4_itable_unused_count(struct super_block *sb,
 361                               struct ext4_group_desc *bg)
 362 {
 363         return le16_to_cpu(bg->bg_itable_unused_lo) |
 364                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 365                  (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 366 }
 367
 368 void ext4_block_bitmap_set(struct super_block *sb,
 369                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 370 {
 371         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 372         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 373                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 374 }
 375
 376 void ext4_inode_bitmap_set(struct super_block *sb,
 377                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 378 {
 379         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 380         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 381                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 382 }
 383
 384 void ext4_inode_table_set(struct super_block *sb,
 385                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 386 {
 387         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 388         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 389                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 390 }
 391
 392 void ext4_free_group_clusters_set(struct super_block *sb,
 393                                   struct ext4_group_desc *bg, __u32 count)
 394 {
 395         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 396         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 397                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 398 }
 399
 400 void ext4_free_inodes_set(struct super_block *sb,
 401                           struct ext4_group_desc *bg, __u32 count)
 402 {
 403         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 404         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 405                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 406 }
 407
 408 void ext4_used_dirs_set(struct super_block *sb,
 409                           struct ext4_group_desc *bg, __u32 count)
 410 {
 411         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 412         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 413                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 414 }
 415
 416 void ext4_itable_unused_set(struct super_block *sb,
 417                           struct ext4_group_desc *bg, __u32 count)
 418 {
 419         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 420         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 421                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 422 }
 423
 424 static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
 425 {
 426         now = clamp_val(now, 0, (1ull << 40) - 1);
 427
 428         *lo = cpu_to_le32(lower_32_bits(now));
 429         *hi = upper_32_bits(now);
 430 }
 431
 432 static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
 433 {
 434         return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
 435 }
 436 #define ext4_update_tstamp(es, tstamp) \
 437         __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
 438                              ktime_get_real_seconds())
 439 #define ext4_get_tstamp(es, tstamp) \
 440         __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 441
 442 /*
 443  * The del_gendisk() function uninitializes the disk-specific data
 444  * structures, including the bdi structure, without telling anyone
 445  * else.  Once this happens, any attempt to call mark_buffer_dirty()
 446  * (for example, by ext4_commit_super), will cause a kernel OOPS.
 447  * This is a kludge to prevent these oops until we can put in a proper
 448  * hook in del_gendisk() to inform the VFS and file system layers.
 449  */
 450 static int block_device_ejected(struct super_block *sb)
 451 {
 452         struct inode *bd_inode = sb->s_bdev->bd_inode;
 453         struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 454
 455         return bdi->dev == NULL;
 456 }
 457
 458 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 459 {
 460         struct super_block              *sb = journal->j_private;
 461         struct ext4_sb_info             *sbi = EXT4_SB(sb);
 462         int                             error = is_journal_aborted(journal);
 463         struct ext4_journal_cb_entry    *jce;
 464
 465         BUG_ON(txn->t_state == T_FINISHED);
 466
 467         ext4_process_freed_data(sb, txn->t_tid);
 468
 469         spin_lock(&sbi->s_md_lock);
 470         while (!list_empty(&txn->t_private_list)) {
 471                 jce = list_entry(txn->t_private_list.next,
 472                                  struct ext4_journal_cb_entry, jce_list);
 473                 list_del_init(&jce->jce_list);
 474                 spin_unlock(&sbi->s_md_lock);
 475                 jce->jce_func(sb, jce, error);
 476                 spin_lock(&sbi->s_md_lock);
 477         }
 478         spin_unlock(&sbi->s_md_lock);
 479 }
 480
 481 /*
 482  * This writepage callback for write_cache_pages()
 483  * takes care of a few cases after page cleaning.
 484  *
 485  * write_cache_pages() already checks for dirty pages
 486  * and calls clear_page_dirty_for_io(), which we want,
 487  * to write protect the pages.
 488  *
 489  * However, we may have to redirty a page (see below.)
 490  */
 491 static int ext4_journalled_writepage_callback(struct page *page,
 492                                               struct writeback_control *wbc,
 493                                               void *data)
 494 {
 495         transaction_t *transaction = (transaction_t *) data;
 496         struct buffer_head *bh, *head;
 497         struct journal_head *jh;
 498
 499         bh = head = page_buffers(page);
 500         do {
 501                 /*
 502                  * We have to redirty a page in these cases:
 503                  * 1) If buffer is dirty, it means the page was dirty because it
 504                  * contains a buffer that needs checkpointing. So the dirty bit
 505                  * needs to be preserved so that checkpointing writes the buffer
 506                  * properly.
 507                  * 2) If buffer is not part of the committing transaction
 508                  * (we may have just accidentally come across this buffer because
 509                  * inode range tracking is not exact) or if the currently running
 510                  * transaction already contains this buffer as well, dirty bit
 511                  * needs to be preserved so that the buffer gets writeprotected
 512                  * properly on running transaction's commit.
 513                  */
 514                 jh = bh2jh(bh);
 515                 if (buffer_dirty(bh) ||
 516                     (jh && (jh->b_transaction != transaction ||
 517                             jh->b_next_transaction))) {
 518                         redirty_page_for_writepage(wbc, page);
 519                         goto out;
 520                 }
 521         } while ((bh = bh->b_this_page) != head);
 522
 523 out:
 524         return AOP_WRITEPAGE_ACTIVATE;
 525 }
 526
 527 static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
 528 {
 529         struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
 530         struct writeback_control wbc = {
 531                 .sync_mode =  WB_SYNC_ALL,
 532                 .nr_to_write = LONG_MAX,
 533                 .range_start = jinode->i_dirty_start,
 534                 .range_end = jinode->i_dirty_end,
 535         };
 536
 537         return write_cache_pages(mapping, &wbc,
 538                                  ext4_journalled_writepage_callback,
 539                                  jinode->i_transaction);
 540 }
 541
 542 static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
 543 {
 544         int ret;
 545
 546         if (ext4_should_journal_data(jinode->i_vfs_inode))
 547                 ret = ext4_journalled_submit_inode_data_buffers(jinode);
 548         else
 549                 ret = jbd2_journal_submit_inode_data_buffers(jinode);
 550
 551         return ret;
 552 }
 553
 554 static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
 555 {
 556         int ret = 0;
 557
 558         if (!ext4_should_journal_data(jinode->i_vfs_inode))
 559                 ret = jbd2_journal_finish_inode_data_buffers(jinode);
 560
 561         return ret;
 562 }
 563
 564 static bool system_going_down(void)
 565 {
 566         return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
 567                 || system_state == SYSTEM_RESTART;
 568 }
 569
 570 struct ext4_err_translation {
 571         int code;
 572         int errno;
 573 };
 574
 575 #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
 576
 577 static struct ext4_err_translation err_translation[] = {
 578         EXT4_ERR_TRANSLATE(EIO),
 579         EXT4_ERR_TRANSLATE(ENOMEM),
 580         EXT4_ERR_TRANSLATE(EFSBADCRC),
 581         EXT4_ERR_TRANSLATE(EFSCORRUPTED),
 582         EXT4_ERR_TRANSLATE(ENOSPC),
 583         EXT4_ERR_TRANSLATE(ENOKEY),
 584         EXT4_ERR_TRANSLATE(EROFS),
 585         EXT4_ERR_TRANSLATE(EFBIG),
 586         EXT4_ERR_TRANSLATE(EEXIST),
 587         EXT4_ERR_TRANSLATE(ERANGE),
 588         EXT4_ERR_TRANSLATE(EOVERFLOW),
 589         EXT4_ERR_TRANSLATE(EBUSY),
 590         EXT4_ERR_TRANSLATE(ENOTDIR),
 591         EXT4_ERR_TRANSLATE(ENOTEMPTY),
 592         EXT4_ERR_TRANSLATE(ESHUTDOWN),
 593         EXT4_ERR_TRANSLATE(EFAULT),
 594 };
 595
 596 static int ext4_errno_to_code(int errno)
 597 {
 598         int i;
 599
 600         for (i = 0; i < ARRAY_SIZE(err_translation); i++)
 601                 if (err_translation[i].errno == errno)
 602                         return err_translation[i].code;
 603         return EXT4_ERR_UNKNOWN;
 604 }
 605
 606 static void save_error_info(struct super_block *sb, int error,
 607                             __u32 ino, __u64 block,
 608                             const char *func, unsigned int line)
 609 {
 610         struct ext4_sb_info *sbi = EXT4_SB(sb);
 611
 612         /* We default to EFSCORRUPTED error... */
 613         if (error == 0)
 614                 error = EFSCORRUPTED;
 615
 616         spin_lock(&sbi->s_error_lock);
 617         sbi->s_add_error_count++;
 618         sbi->s_last_error_code = error;
 619         sbi->s_last_error_line = line;
 620         sbi->s_last_error_ino = ino;
 621         sbi->s_last_error_block = block;
 622         sbi->s_last_error_func = func;
 623         sbi->s_last_error_time = ktime_get_real_seconds();
 624         if (!sbi->s_first_error_time) {
 625                 sbi->s_first_error_code = error;
 626                 sbi->s_first_error_line = line;
 627                 sbi->s_first_error_ino = ino;
 628                 sbi->s_first_error_block = block;
 629                 sbi->s_first_error_func = func;
 630                 sbi->s_first_error_time = sbi->s_last_error_time;
 631         }
 632         spin_unlock(&sbi->s_error_lock);
 633 }
 634
 635 /* Deal with the reporting of failure conditions on a filesystem such as
 636  * inconsistencies detected or read IO failures.
 637  *
 638  * On ext2, we can store the error state of the filesystem in the
 639  * superblock.  That is not possible on ext4, because we may have other
 640  * write ordering constraints on the superblock which prevent us from
 641  * writing it out straight away; and given that the journal is about to
 642  * be aborted, we can't rely on the current, or future, transactions to
 643  * write out the superblock safely.
 644  *
 645  * We'll just use the jbd2_journal_abort() error code to record an error in
 646  * the journal instead.  On recovery, the journal will complain about
 647  * that error until we've noted it down and cleared it.
 648  *
 649  * If force_ro is set, we unconditionally force the filesystem into an
 650  * ABORT|READONLY state, unless the error response on the fs has been set to
 651  * panic in which case we take the easy way out and panic immediately. This is
 652  * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
 653  * at a critical moment in log management.
 654  */
 655 static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
 656                               __u32 ino, __u64 block,
 657                               const char *func, unsigned int line)
 658 {
 659         journal_t *journal = EXT4_SB(sb)->s_journal;
 660         bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
 661
 662         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 663         if (test_opt(sb, WARN_ON_ERROR))
 664                 WARN_ON_ONCE(1);
 665
 666         if (!continue_fs && !sb_rdonly(sb)) {
 667                 ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
 668                 if (journal)
 669                         jbd2_journal_abort(journal, -EIO);
 670         }
 671
 672         if (!bdev_read_only(sb->s_bdev)) {
 673                 save_error_info(sb, error, ino, block, func, line);
 674                 /*
 675                  * In case the fs should keep running, we need to writeout
 676                  * superblock through the journal. Due to lock ordering
 677                  * constraints, it may not be safe to do it right here so we
 678                  * defer superblock flushing to a workqueue.
 679                  */
 680                 if (continue_fs && journal)
 681                         schedule_work(&EXT4_SB(sb)->s_error_work);
 682                 else
 683                         ext4_commit_super(sb);
 684         }
 685
 686         /*
 687          * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 688          * could panic during 'reboot -f' as the underlying device got already
 689          * disabled.
 690          */
 691         if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
 692                 panic("EXT4-fs (device %s): panic forced after error\n",
 693                         sb->s_id);
 694         }
 695
 696         if (sb_rdonly(sb) || continue_fs)
 697                 return;
 698
 699         ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 700         /*
 701          * Make sure updated value of ->s_mount_flags will be visible before
 702          * ->s_flags update
 703          */
 704         smp_wmb();
 705         sb->s_flags |= SB_RDONLY;
 706 }
 707
 708 static void flush_stashed_error_work(struct work_struct *work)
 709 {
 710         struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
 711                                                 s_error_work);
 712         journal_t *journal = sbi->s_journal;
 713         handle_t *handle;
 714
 715         /*
 716          * If the journal is still running, we have to write out superblock
 717          * through the journal to avoid collisions of other journalled sb
 718          * updates.
 719          *
 720          * We use directly jbd2 functions here to avoid recursing back into
 721          * ext4 error handling code during handling of previous errors.
 722          */
 723         if (!sb_rdonly(sbi->s_sb) && journal) {
 724                 struct buffer_head *sbh = sbi->s_sbh;
 725                 handle = jbd2_journal_start(journal, 1);
 726                 if (IS_ERR(handle))
 727                         goto write_directly;
 728                 if (jbd2_journal_get_write_access(handle, sbh)) {
 729                         jbd2_journal_stop(handle);
 730                         goto write_directly;
 731                 }
 732                 ext4_update_super(sbi->s_sb);
 733                 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
 734                         ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
 735                                  "superblock detected");
 736                         clear_buffer_write_io_error(sbh);
 737                         set_buffer_uptodate(sbh);
 738                 }
 739
 740                 if (jbd2_journal_dirty_metadata(handle, sbh)) {
 741                         jbd2_journal_stop(handle);
 742                         goto write_directly;
 743                 }
 744                 jbd2_journal_stop(handle);
 745                 ext4_notify_error_sysfs(sbi);
 746                 return;
 747         }
 748 write_directly:
 749         /*
 750          * Write through journal failed. Write sb directly to get error info
 751          * out and hope for the best.
 752          */
 753         ext4_commit_super(sbi->s_sb);
 754         ext4_notify_error_sysfs(sbi);
 755 }
 756
 757 #define ext4_error_ratelimit(sb)                                        \
 758                 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 759                              "EXT4-fs error")
 760
 761 void __ext4_error(struct super_block *sb, const char *function,
 762                   unsigned int line, bool force_ro, int error, __u64 block,
 763                   const char *fmt, ...)
 764 {
 765         struct va_format vaf;
 766         va_list args;
 767
 768         if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 769                 return;
 770
 771         trace_ext4_error(sb, function, line);
 772         if (ext4_error_ratelimit(sb)) {
 773                 va_start(args, fmt);
 774                 vaf.fmt = fmt;
 775                 vaf.va = &args;
 776                 printk(KERN_CRIT
 777                        "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 778                        sb->s_id, function, line, current->comm, &vaf);
 779                 va_end(args);
 780         }
 781         fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
 782
 783         ext4_handle_error(sb, force_ro, error, 0, block, function, line);
 784 }
 785
 786 void __ext4_error_inode(struct inode *inode, const char *function,
 787                         unsigned int line, ext4_fsblk_t block, int error,
 788                         const char *fmt, ...)
 789 {
 790         va_list args;
 791         struct va_format vaf;
 792
 793         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 794                 return;
 795
 796         trace_ext4_error(inode->i_sb, function, line);
 797         if (ext4_error_ratelimit(inode->i_sb)) {
 798                 va_start(args, fmt);
 799                 vaf.fmt = fmt;
 800                 vaf.va = &args;
 801                 if (block)
 802                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 803                                "inode #%lu: block %llu: comm %s: %pV\n",
 804                                inode->i_sb->s_id, function, line, inode->i_ino,
 805                                block, current->comm, &vaf);
 806                 else
 807                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 808                                "inode #%lu: comm %s: %pV\n",
 809                                inode->i_sb->s_id, function, line, inode->i_ino,
 810                                current->comm, &vaf);
 811                 va_end(args);
 812         }
 813         fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
 814
 815         ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
 816                           function, line);
 817 }
 818
 819 void __ext4_error_file(struct file *file, const char *function,
 820                        unsigned int line, ext4_fsblk_t block,
 821                        const char *fmt, ...)
 822 {
 823         va_list args;
 824         struct va_format vaf;
 825         struct inode *inode = file_inode(file);
 826         char pathname[80], *path;
 827
 828         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 829                 return;
 830
 831         trace_ext4_error(inode->i_sb, function, line);
 832         if (ext4_error_ratelimit(inode->i_sb)) {
 833                 path = file_path(file, pathname, sizeof(pathname));
 834                 if (IS_ERR(path))
 835                         path = "(unknown)";
 836                 va_start(args, fmt);
 837                 vaf.fmt = fmt;
 838                 vaf.va = &args;
 839                 if (block)
 840                         printk(KERN_CRIT
 841                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 842                                "block %llu: comm %s: path %s: %pV\n",
 843                                inode->i_sb->s_id, function, line, inode->i_ino,
 844                                block, current->comm, path, &vaf);
 845                 else
 846                         printk(KERN_CRIT
 847                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 848                                "comm %s: path %s: %pV\n",
 849                                inode->i_sb->s_id, function, line, inode->i_ino,
 850                                current->comm, path, &vaf);
 851                 va_end(args);
 852         }
 853         fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
 854
 855         ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
 856                           function, line);
 857 }
 858
 859 const char *ext4_decode_error(struct super_block *sb, int errno,
 860                               char nbuf[16])
 861 {
 862         char *errstr = NULL;
 863
 864         switch (errno) {
 865         case -EFSCORRUPTED:
 866                 errstr = "Corrupt filesystem";
 867                 break;
 868         case -EFSBADCRC:
 869                 errstr = "Filesystem failed CRC";
 870                 break;
 871         case -EIO:
 872                 errstr = "IO failure";
 873                 break;
 874         case -ENOMEM:
 875                 errstr = "Out of memory";
 876                 break;
 877         case -EROFS:
 878                 if (!sb || (EXT4_SB(sb)->s_journal &&
 879                             EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 880                         errstr = "Journal has aborted";
 881                 else
 882                         errstr = "Readonly filesystem";
 883                 break;
 884         default:
 885                 /* If the caller passed in an extra buffer for unknown
 886                  * errors, textualise them now.  Else we just return
 887                  * NULL. */
 888                 if (nbuf) {
 889                         /* Check for truncated error codes... */
 890                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 891                                 errstr = nbuf;
 892                 }
 893                 break;
 894         }
 895
 896         return errstr;
 897 }
 898
 899 /* __ext4_std_error decodes expected errors from journaling functions
 900  * automatically and invokes the appropriate error response.  */
 901
 902 void __ext4_std_error(struct super_block *sb, const char *function,
 903                       unsigned int line, int errno)
 904 {
 905         char nbuf[16];
 906         const char *errstr;
 907
 908         if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 909                 return;
 910
 911         /* Special case: if the error is EROFS, and we're not already
 912          * inside a transaction, then there's really no point in logging
 913          * an error. */
 914         if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
 915                 return;
 916
 917         if (ext4_error_ratelimit(sb)) {
 918                 errstr = ext4_decode_error(sb, errno, nbuf);
 919                 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 920                        sb->s_id, function, line, errstr);
 921         }
 922         fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
 923
 924         ext4_handle_error(sb, false, -errno, 0, 0, function, line);
 925 }
 926
 927 void __ext4_msg(struct super_block *sb,
 928                 const char *prefix, const char *fmt, ...)
 929 {
 930         struct va_format vaf;
 931         va_list args;
 932
 933         if (sb) {
 934                 atomic_inc(&EXT4_SB(sb)->s_msg_count);
 935                 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
 936                                   "EXT4-fs"))
 937                         return;
 938         }
 939
 940         va_start(args, fmt);
 941         vaf.fmt = fmt;
 942         vaf.va = &args;
 943         if (sb)
 944                 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
 945         else
 946                 printk("%sEXT4-fs: %pV\n", prefix, &vaf);
 947         va_end(args);
 948 }
 949
 950 static int ext4_warning_ratelimit(struct super_block *sb)
 951 {
 952         atomic_inc(&EXT4_SB(sb)->s_warning_count);
 953         return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
 954                             "EXT4-fs warning");
 955 }
 956
 957 void __ext4_warning(struct super_block *sb, const char *function,
 958                     unsigned int line, const char *fmt, ...)
 959 {
 960         struct va_format vaf;
 961         va_list args;
 962
 963         if (!ext4_warning_ratelimit(sb))
 964                 return;
 965
 966         va_start(args, fmt);
 967         vaf.fmt = fmt;
 968         vaf.va = &args;
 969         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
 970                sb->s_id, function, line, &vaf);
 971         va_end(args);
 972 }
 973
 974 void __ext4_warning_inode(const struct inode *inode, const char *function,
 975                           unsigned int line, const char *fmt, ...)
 976 {
 977         struct va_format vaf;
 978         va_list args;
 979
 980         if (!ext4_warning_ratelimit(inode->i_sb))
 981                 return;
 982
 983         va_start(args, fmt);
 984         vaf.fmt = fmt;
 985         vaf.va = &args;
 986         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
 987                "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
 988                function, line, inode->i_ino, current->comm, &vaf);
 989         va_end(args);
 990 }
 991
 992 void __ext4_grp_locked_error(const char *function, unsigned int line,
 993                              struct super_block *sb, ext4_group_t grp,
 994                              unsigned long ino, ext4_fsblk_t block,
 995                              const char *fmt, ...)
 996 __releases(bitlock)
 997 __acquires(bitlock)
 998 {
 999         struct va_format vaf;
1000         va_list args;
1001
1002         if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
1003                 return;
1004
1005         trace_ext4_error(sb, function, line);
1006         if (ext4_error_ratelimit(sb)) {
1007                 va_start(args, fmt);
1008                 vaf.fmt = fmt;
1009                 vaf.va = &args;
1010                 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1011                        sb->s_id, function, line, grp);
1012                 if (ino)
1013                         printk(KERN_CONT "inode %lu: ", ino);
1014                 if (block)
1015                         printk(KERN_CONT "block %llu:",
1016                                (unsigned long long) block);
1017                 printk(KERN_CONT "%pV\n", &vaf);
1018                 va_end(args);
1019         }
1020
1021         if (test_opt(sb, ERRORS_CONT)) {
1022                 if (test_opt(sb, WARN_ON_ERROR))
1023                         WARN_ON_ONCE(1);
1024                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1025                 if (!bdev_read_only(sb->s_bdev)) {
1026                         save_error_info(sb, EFSCORRUPTED, ino, block, function,
1027                                         line);
1028                         schedule_work(&EXT4_SB(sb)->s_error_work);
1029                 }
1030                 return;
1031         }
1032         ext4_unlock_group(sb, grp);
1033         ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1034         /*
1035          * We only get here in the ERRORS_RO case; relocking the group
1036          * may be dangerous, but nothing bad will happen since the
1037          * filesystem will have already been marked read/only and the
1038          * journal has been aborted.  We return 1 as a hint to callers
1039          * who might what to use the return value from
1040          * ext4_grp_locked_error() to distinguish between the
1041          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1042          * aggressively from the ext4 function in question, with a
1043          * more appropriate error code.
1044          */
1045         ext4_lock_group(sb, grp);
1046         return;
1047 }
1048
1049 void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1050                                      ext4_group_t group,
1051                                      unsigned int flags)
1052 {
1053         struct ext4_sb_info *sbi = EXT4_SB(sb);
1054         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1055         struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1056         int ret;
1057
1058         if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1059                 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1060                                             &grp->bb_state);
1061                 if (!ret)
1062                         percpu_counter_sub(&sbi->s_freeclusters_counter,
1063                                            grp->bb_free);
1064         }
1065
1066         if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1067                 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1068                                             &grp->bb_state);
1069                 if (!ret && gdp) {
1070                         int count;
1071
1072                         count = ext4_free_inodes_count(sb, gdp);
1073                         percpu_counter_sub(&sbi->s_freeinodes_counter,
1074                                            count);
1075                 }
1076         }
1077 }
1078
1079 void ext4_update_dynamic_rev(struct super_block *sb)
1080 {
1081         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1082
1083         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1084                 return;
1085
1086         ext4_warning(sb,
1087                      "updating to rev %d because of new feature flag, "
1088                      "running e2fsck is recommended",
1089                      EXT4_DYNAMIC_REV);
1090
1091         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1092         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1093         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1094         /* leave es->s_feature_*compat flags alone */
1095         /* es->s_uuid will be set by e2fsck if empty */
1096
1097         /*
1098          * The rest of the superblock fields should be zero, and if not it
1099          * means they are likely already in use, so leave them alone.  We
1100          * can leave it up to e2fsck to clean up any inconsistencies there.
1101          */
1102 }
1103
1104 /*
1105  * Open the external journal device
1106  */
1107 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
1108 {
1109         struct block_device *bdev;
1110
1111         bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
1112         if (IS_ERR(bdev))
1113                 goto fail;
1114         return bdev;
1115
1116 fail:
1117         ext4_msg(sb, KERN_ERR,
1118                  "failed to open journal device unknown-block(%u,%u) %ld",
1119                  MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
1120         return NULL;
1121 }
1122
1123 /*
1124  * Release the journal device
1125  */
1126 static void ext4_blkdev_put(struct block_device *bdev)
1127 {
1128         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1129 }
1130
1131 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
1132 {
1133         struct block_device *bdev;
1134         bdev = sbi->s_journal_bdev;
1135         if (bdev) {
1136                 ext4_blkdev_put(bdev);
1137                 sbi->s_journal_bdev = NULL;
1138         }
1139 }
1140
1141 static inline struct inode *orphan_list_entry(struct list_head *l)
1142 {
1143         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1144 }
1145
1146 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1147 {
1148         struct list_head *l;
1149
1150         ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1151                  le32_to_cpu(sbi->s_es->s_last_orphan));
1152
1153         printk(KERN_ERR "sb_info orphan list:\n");
1154         list_for_each(l, &sbi->s_orphan) {
1155                 struct inode *inode = orphan_list_entry(l);
1156                 printk(KERN_ERR "  "
1157                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1158                        inode->i_sb->s_id, inode->i_ino, inode,
1159                        inode->i_mode, inode->i_nlink,
1160                        NEXT_ORPHAN(inode));
1161         }
1162 }
1163
1164 #ifdef CONFIG_QUOTA
1165 static int ext4_quota_off(struct super_block *sb, int type);
1166
1167 static inline void ext4_quota_off_umount(struct super_block *sb)
1168 {
1169         int type;
1170
1171         /* Use our quota_off function to clear inode flags etc. */
1172         for (type = 0; type < EXT4_MAXQUOTAS; type++)
1173                 ext4_quota_off(sb, type);
1174 }
1175
1176 /*
1177  * This is a helper function which is used in the mount/remount
1178  * codepaths (which holds s_umount) to fetch the quota file name.
1179  */
1180 static inline char *get_qf_name(struct super_block *sb,
1181                                 struct ext4_sb_info *sbi,
1182                                 int type)
1183 {
1184         return rcu_dereference_protected(sbi->s_qf_names[type],
1185                                          lockdep_is_held(&sb->s_umount));
1186 }
1187 #else
1188 static inline void ext4_quota_off_umount(struct super_block *sb)
1189 {
1190 }
1191 #endif
1192
1193 static void ext4_put_super(struct super_block *sb)
1194 {
1195         struct ext4_sb_info *sbi = EXT4_SB(sb);
1196         struct ext4_super_block *es = sbi->s_es;
1197         struct buffer_head **group_desc;
1198         struct flex_groups **flex_groups;
1199         int aborted = 0;
1200         int i, err;
1201
1202         /*
1203          * Unregister sysfs before destroying jbd2 journal.
1204          * Since we could still access attr_journal_task attribute via sysfs
1205          * path which could have sbi->s_journal->j_task as NULL
1206          * Unregister sysfs before flush sbi->s_error_work.
1207          * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1208          * read metadata verify failed then will queue error work.
1209          * flush_stashed_error_work will call start_this_handle may trigger
1210          * BUG_ON.
1211          */
1212         ext4_unregister_sysfs(sb);
1213
1214         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1215                 ext4_msg(sb, KERN_INFO, "unmounting filesystem.");
1216
1217         ext4_unregister_li_request(sb);
1218         ext4_quota_off_umount(sb);
1219
1220         flush_work(&sbi->s_error_work);
1221         destroy_workqueue(sbi->rsv_conversion_wq);
1222         ext4_release_orphan_info(sb);
1223
1224         if (sbi->s_journal) {
1225                 aborted = is_journal_aborted(sbi->s_journal);
1226                 err = jbd2_journal_destroy(sbi->s_journal);
1227                 sbi->s_journal = NULL;
1228                 if ((err < 0) && !aborted) {
1229                         ext4_abort(sb, -err, "Couldn't clean up the journal");
1230                 }
1231         }
1232
1233         ext4_es_unregister_shrinker(sbi);
1234         del_timer_sync(&sbi->s_err_report);
1235         ext4_release_system_zone(sb);
1236         ext4_mb_release(sb);
1237         ext4_ext_release(sb);
1238
1239         if (!sb_rdonly(sb) && !aborted) {
1240                 ext4_clear_feature_journal_needs_recovery(sb);
1241                 ext4_clear_feature_orphan_present(sb);
1242                 es->s_state = cpu_to_le16(sbi->s_mount_state);
1243         }
1244         if (!sb_rdonly(sb))
1245                 ext4_commit_super(sb);
1246
1247         rcu_read_lock();
1248         group_desc = rcu_dereference(sbi->s_group_desc);
1249         for (i = 0; i < sbi->s_gdb_count; i++)
1250                 brelse(group_desc[i]);
1251         kvfree(group_desc);
1252         flex_groups = rcu_dereference(sbi->s_flex_groups);
1253         if (flex_groups) {
1254                 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1255                         kvfree(flex_groups[i]);
1256                 kvfree(flex_groups);
1257         }
1258         rcu_read_unlock();
1259         percpu_counter_destroy(&sbi->s_freeclusters_counter);
1260         percpu_counter_destroy(&sbi->s_freeinodes_counter);
1261         percpu_counter_destroy(&sbi->s_dirs_counter);
1262         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1263         percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1264         percpu_free_rwsem(&sbi->s_writepages_rwsem);
1265 #ifdef CONFIG_QUOTA
1266         for (i = 0; i < EXT4_MAXQUOTAS; i++)
1267                 kfree(get_qf_name(sb, sbi, i));
1268 #endif
1269
1270         /* Debugging code just in case the in-memory inode orphan list
1271          * isn't empty.  The on-disk one can be non-empty if we've
1272          * detected an error and taken the fs readonly, but the
1273          * in-memory list had better be clean by this point. */
1274         if (!list_empty(&sbi->s_orphan))
1275                 dump_orphan_list(sb, sbi);
1276         ASSERT(list_empty(&sbi->s_orphan));
1277
1278         sync_blockdev(sb->s_bdev);
1279         invalidate_bdev(sb->s_bdev);
1280         if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
1281                 /*
1282                  * Invalidate the journal device's buffers.  We don't want them
1283                  * floating about in memory - the physical journal device may
1284                  * hotswapped, and it breaks the `ro-after' testing code.
1285                  */
1286                 sync_blockdev(sbi->s_journal_bdev);
1287                 invalidate_bdev(sbi->s_journal_bdev);
1288                 ext4_blkdev_remove(sbi);
1289         }
1290
1291         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1292         sbi->s_ea_inode_cache = NULL;
1293
1294         ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1295         sbi->s_ea_block_cache = NULL;
1296
1297         ext4_stop_mmpd(sbi);
1298
1299         brelse(sbi->s_sbh);
1300         sb->s_fs_info = NULL;
1301         /*
1302          * Now that we are completely done shutting down the
1303          * superblock, we need to actually destroy the kobject.
1304          */
1305         kobject_put(&sbi->s_kobj);
1306         wait_for_completion(&sbi->s_kobj_unregister);
1307         if (sbi->s_chksum_driver)
1308                 crypto_free_shash(sbi->s_chksum_driver);
1309         kfree(sbi->s_blockgroup_lock);
1310         fs_put_dax(sbi->s_daxdev, NULL);
1311         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1312 #if IS_ENABLED(CONFIG_UNICODE)
1313         utf8_unload(sb->s_encoding);
1314 #endif
1315         kfree(sbi);
1316 }
1317
1318 static struct kmem_cache *ext4_inode_cachep;
1319
1320 /*
1321  * Called inside transaction, so use GFP_NOFS
1322  */
1323 static struct inode *ext4_alloc_inode(struct super_block *sb)
1324 {
1325         struct ext4_inode_info *ei;
1326
1327         ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1328         if (!ei)
1329                 return NULL;
1330
1331         inode_set_iversion(&ei->vfs_inode, 1);
1332         spin_lock_init(&ei->i_raw_lock);
1333         INIT_LIST_HEAD(&ei->i_prealloc_list);
1334         atomic_set(&ei->i_prealloc_active, 0);
1335         spin_lock_init(&ei->i_prealloc_lock);
1336         ext4_es_init_tree(&ei->i_es_tree);
1337         rwlock_init(&ei->i_es_lock);
1338         INIT_LIST_HEAD(&ei->i_es_list);
1339         ei->i_es_all_nr = 0;
1340         ei->i_es_shk_nr = 0;
1341         ei->i_es_shrink_lblk = 0;
1342         ei->i_reserved_data_blocks = 0;
1343         spin_lock_init(&(ei->i_block_reservation_lock));
1344         ext4_init_pending_tree(&ei->i_pending_tree);
1345 #ifdef CONFIG_QUOTA
1346         ei->i_reserved_quota = 0;
1347         memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1348 #endif
1349         ei->jinode = NULL;
1350         INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1351         spin_lock_init(&ei->i_completed_io_lock);
1352         ei->i_sync_tid = 0;
1353         ei->i_datasync_tid = 0;
1354         atomic_set(&ei->i_unwritten, 0);
1355         INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1356         ext4_fc_init_inode(&ei->vfs_inode);
1357         mutex_init(&ei->i_fc_lock);
1358         return &ei->vfs_inode;
1359 }
1360
1361 static int ext4_drop_inode(struct inode *inode)
1362 {
1363         int drop = generic_drop_inode(inode);
1364
1365         if (!drop)
1366                 drop = fscrypt_drop_inode(inode);
1367
1368         trace_ext4_drop_inode(inode, drop);
1369         return drop;
1370 }
1371
1372 static void ext4_free_in_core_inode(struct inode *inode)
1373 {
1374         fscrypt_free_inode(inode);
1375         if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1376                 pr_warn("%s: inode %ld still in fc list",
1377                         __func__, inode->i_ino);
1378         }
1379         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1380 }
1381
1382 static void ext4_destroy_inode(struct inode *inode)
1383 {
1384         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1385                 ext4_msg(inode->i_sb, KERN_ERR,
1386                          "Inode %lu (%p): orphan list check failed!",
1387                          inode->i_ino, EXT4_I(inode));
1388                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1389                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
1390                                 true);
1391                 dump_stack();
1392         }
1393
1394         if (EXT4_I(inode)->i_reserved_data_blocks)
1395                 ext4_msg(inode->i_sb, KERN_ERR,
1396                          "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1397                          inode->i_ino, EXT4_I(inode),
1398                          EXT4_I(inode)->i_reserved_data_blocks);
1399 }
1400
1401 static void init_once(void *foo)
1402 {
1403         struct ext4_inode_info *ei = foo;
1404
1405         INIT_LIST_HEAD(&ei->i_orphan);
1406         init_rwsem(&ei->xattr_sem);
1407         init_rwsem(&ei->i_data_sem);
1408         inode_init_once(&ei->vfs_inode);
1409         ext4_fc_init_inode(&ei->vfs_inode);
1410 }
1411
1412 static int __init init_inodecache(void)
1413 {
1414         ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1415                                 sizeof(struct ext4_inode_info), 0,
1416                                 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1417                                         SLAB_ACCOUNT),
1418                                 offsetof(struct ext4_inode_info, i_data),
1419                                 sizeof_field(struct ext4_inode_info, i_data),
1420                                 init_once);
1421         if (ext4_inode_cachep == NULL)
1422                 return -ENOMEM;
1423         return 0;
1424 }
1425
1426 static void destroy_inodecache(void)
1427 {
1428         /*
1429          * Make sure all delayed rcu free inodes are flushed before we
1430          * destroy cache.
1431          */
1432         rcu_barrier();
1433         kmem_cache_destroy(ext4_inode_cachep);
1434 }
1435
1436 void ext4_clear_inode(struct inode *inode)
1437 {
1438         ext4_fc_del(inode);
1439         invalidate_inode_buffers(inode);
1440         clear_inode(inode);
1441         ext4_discard_preallocations(inode, 0);
1442         ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1443         dquot_drop(inode);
1444         if (EXT4_I(inode)->jinode) {
1445                 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1446                                                EXT4_I(inode)->jinode);
1447                 jbd2_free_inode(EXT4_I(inode)->jinode);
1448                 EXT4_I(inode)->jinode = NULL;
1449         }
1450         fscrypt_put_encryption_info(inode);
1451         fsverity_cleanup_inode(inode);
1452 }
1453
1454 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1455                                         u64 ino, u32 generation)
1456 {
1457         struct inode *inode;
1458
1459         /*
1460          * Currently we don't know the generation for parent directory, so
1461          * a generation of 0 means "accept any"
1462          */
1463         inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1464         if (IS_ERR(inode))
1465                 return ERR_CAST(inode);
1466         if (generation && inode->i_generation != generation) {
1467                 iput(inode);
1468                 return ERR_PTR(-ESTALE);
1469         }
1470
1471         return inode;
1472 }
1473
1474 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1475                                         int fh_len, int fh_type)
1476 {
1477         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1478                                     ext4_nfs_get_inode);
1479 }
1480
1481 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1482                                         int fh_len, int fh_type)
1483 {
1484         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1485                                     ext4_nfs_get_inode);
1486 }
1487
1488 static int ext4_nfs_commit_metadata(struct inode *inode)
1489 {
1490         struct writeback_control wbc = {
1491                 .sync_mode = WB_SYNC_ALL
1492         };
1493
1494         trace_ext4_nfs_commit_metadata(inode);
1495         return ext4_write_inode(inode, &wbc);
1496 }
1497
1498 #ifdef CONFIG_QUOTA
1499 static const char * const quotatypes[] = INITQFNAMES;
1500 #define QTYPE2NAME(t) (quotatypes[t])
1501
1502 static int ext4_write_dquot(struct dquot *dquot);
1503 static int ext4_acquire_dquot(struct dquot *dquot);
1504 static int ext4_release_dquot(struct dquot *dquot);
1505 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1506 static int ext4_write_info(struct super_block *sb, int type);
1507 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1508                          const struct path *path);
1509 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1510                                size_t len, loff_t off);
1511 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1512                                 const char *data, size_t len, loff_t off);
1513 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1514                              unsigned int flags);
1515
1516 static struct dquot **ext4_get_dquots(struct inode *inode)
1517 {
1518         return EXT4_I(inode)->i_dquot;
1519 }
1520
1521 static const struct dquot_operations ext4_quota_operations = {
1522         .get_reserved_space     = ext4_get_reserved_space,
1523         .write_dquot            = ext4_write_dquot,
1524         .acquire_dquot          = ext4_acquire_dquot,
1525         .release_dquot          = ext4_release_dquot,
1526         .mark_dirty             = ext4_mark_dquot_dirty,
1527         .write_info             = ext4_write_info,
1528         .alloc_dquot            = dquot_alloc,
1529         .destroy_dquot          = dquot_destroy,
1530         .get_projid             = ext4_get_projid,
1531         .get_inode_usage        = ext4_get_inode_usage,
1532         .get_next_id            = dquot_get_next_id,
1533 };
1534
1535 static const struct quotactl_ops ext4_qctl_operations = {
1536         .quota_on       = ext4_quota_on,
1537         .quota_off      = ext4_quota_off,
1538         .quota_sync     = dquot_quota_sync,
1539         .get_state      = dquot_get_state,
1540         .set_info       = dquot_set_dqinfo,
1541         .get_dqblk      = dquot_get_dqblk,
1542         .set_dqblk      = dquot_set_dqblk,
1543         .get_nextdqblk  = dquot_get_next_dqblk,
1544 };
1545 #endif
1546
1547 static const struct super_operations ext4_sops = {
1548         .alloc_inode    = ext4_alloc_inode,
1549         .free_inode     = ext4_free_in_core_inode,
1550         .destroy_inode  = ext4_destroy_inode,
1551         .write_inode    = ext4_write_inode,
1552         .dirty_inode    = ext4_dirty_inode,
1553         .drop_inode     = ext4_drop_inode,
1554         .evict_inode    = ext4_evict_inode,
1555         .put_super      = ext4_put_super,
1556         .sync_fs        = ext4_sync_fs,
1557         .freeze_fs      = ext4_freeze,
1558         .unfreeze_fs    = ext4_unfreeze,
1559         .statfs         = ext4_statfs,
1560         .show_options   = ext4_show_options,
1561 #ifdef CONFIG_QUOTA
1562         .quota_read     = ext4_quota_read,
1563         .quota_write    = ext4_quota_write,
1564         .get_dquots     = ext4_get_dquots,
1565 #endif
1566 };
1567
1568 static const struct export_operations ext4_export_ops = {
1569         .fh_to_dentry = ext4_fh_to_dentry,
1570         .fh_to_parent = ext4_fh_to_parent,
1571         .get_parent = ext4_get_parent,
1572         .commit_metadata = ext4_nfs_commit_metadata,
1573 };
1574
1575 enum {
1576         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1577         Opt_resgid, Opt_resuid, Opt_sb,
1578         Opt_nouid32, Opt_debug, Opt_removed,
1579         Opt_user_xattr, Opt_acl,
1580         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1581         Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1582         Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1583         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1584         Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1585         Opt_inlinecrypt,
1586         Opt_usrjquota, Opt_grpjquota, Opt_quota,
1587         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1588         Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
1589         Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1590         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1591         Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1592         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1593         Opt_inode_readahead_blks, Opt_journal_ioprio,
1594         Opt_dioread_nolock, Opt_dioread_lock,
1595         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1596         Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1597         Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1598         Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1599 #ifdef CONFIG_EXT4_DEBUG
1600         Opt_fc_debug_max_replay, Opt_fc_debug_force
1601 #endif
1602 };
1603
1604 static const struct constant_table ext4_param_errors[] = {
1605         {"continue",    EXT4_MOUNT_ERRORS_CONT},
1606         {"panic",       EXT4_MOUNT_ERRORS_PANIC},
1607         {"remount-ro",  EXT4_MOUNT_ERRORS_RO},
1608         {}
1609 };
1610
1611 static const struct constant_table ext4_param_data[] = {
1612         {"journal",     EXT4_MOUNT_JOURNAL_DATA},
1613         {"ordered",     EXT4_MOUNT_ORDERED_DATA},
1614         {"writeback",   EXT4_MOUNT_WRITEBACK_DATA},
1615         {}
1616 };
1617
1618 static const struct constant_table ext4_param_data_err[] = {
1619         {"abort",       Opt_data_err_abort},
1620         {"ignore",      Opt_data_err_ignore},
1621         {}
1622 };
1623
1624 static const struct constant_table ext4_param_jqfmt[] = {
1625         {"vfsold",      QFMT_VFS_OLD},
1626         {"vfsv0",       QFMT_VFS_V0},
1627         {"vfsv1",       QFMT_VFS_V1},
1628         {}
1629 };
1630
1631 static const struct constant_table ext4_param_dax[] = {
1632         {"always",      Opt_dax_always},
1633         {"inode",       Opt_dax_inode},
1634         {"never",       Opt_dax_never},
1635         {}
1636 };
1637
1638 /* String parameter that allows empty argument */
1639 #define fsparam_string_empty(NAME, OPT) \
1640         __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
1641
1642 /*
1643  * Mount option specification
1644  * We don't use fsparam_flag_no because of the way we set the
1645  * options and the way we show them in _ext4_show_options(). To
1646  * keep the changes to a minimum, let's keep the negative options
1647  * separate for now.
1648  */
1649 static const struct fs_parameter_spec ext4_param_specs[] = {
1650         fsparam_flag    ("bsddf",               Opt_bsd_df),
1651         fsparam_flag    ("minixdf",             Opt_minix_df),
1652         fsparam_flag    ("grpid",               Opt_grpid),
1653         fsparam_flag    ("bsdgroups",           Opt_grpid),
1654         fsparam_flag    ("nogrpid",             Opt_nogrpid),
1655         fsparam_flag    ("sysvgroups",          Opt_nogrpid),
1656         fsparam_u32     ("resgid",              Opt_resgid),
1657         fsparam_u32     ("resuid",              Opt_resuid),
1658         fsparam_u32     ("sb",                  Opt_sb),
1659         fsparam_enum    ("errors",              Opt_errors, ext4_param_errors),
1660         fsparam_flag    ("nouid32",             Opt_nouid32),
1661         fsparam_flag    ("debug",               Opt_debug),
1662         fsparam_flag    ("oldalloc",            Opt_removed),
1663         fsparam_flag    ("orlov",               Opt_removed),
1664         fsparam_flag    ("user_xattr",          Opt_user_xattr),
1665         fsparam_flag    ("acl",                 Opt_acl),
1666         fsparam_flag    ("norecovery",          Opt_noload),
1667         fsparam_flag    ("noload",              Opt_noload),
1668         fsparam_flag    ("bh",                  Opt_removed),
1669         fsparam_flag    ("nobh",                Opt_removed),
1670         fsparam_u32     ("commit",              Opt_commit),
1671         fsparam_u32     ("min_batch_time",      Opt_min_batch_time),
1672         fsparam_u32     ("max_batch_time",      Opt_max_batch_time),
1673         fsparam_u32     ("journal_dev",         Opt_journal_dev),
1674         fsparam_bdev    ("journal_path",        Opt_journal_path),
1675         fsparam_flag    ("journal_checksum",    Opt_journal_checksum),
1676         fsparam_flag    ("nojournal_checksum",  Opt_nojournal_checksum),
1677         fsparam_flag    ("journal_async_commit",Opt_journal_async_commit),
1678         fsparam_flag    ("abort",               Opt_abort),
1679         fsparam_enum    ("data",                Opt_data, ext4_param_data),
1680         fsparam_enum    ("data_err",            Opt_data_err,
1681                                                 ext4_param_data_err),
1682         fsparam_string_empty
1683                         ("usrjquota",           Opt_usrjquota),
1684         fsparam_string_empty
1685                         ("grpjquota",           Opt_grpjquota),
1686         fsparam_enum    ("jqfmt",               Opt_jqfmt, ext4_param_jqfmt),
1687         fsparam_flag    ("grpquota",            Opt_grpquota),
1688         fsparam_flag    ("quota",               Opt_quota),
1689         fsparam_flag    ("noquota",             Opt_noquota),
1690         fsparam_flag    ("usrquota",            Opt_usrquota),
1691         fsparam_flag    ("prjquota",            Opt_prjquota),
1692         fsparam_flag    ("barrier",             Opt_barrier),
1693         fsparam_u32     ("barrier",             Opt_barrier),
1694         fsparam_flag    ("nobarrier",           Opt_nobarrier),
1695         fsparam_flag    ("i_version",           Opt_i_version),
1696         fsparam_flag    ("dax",                 Opt_dax),
1697         fsparam_enum    ("dax",                 Opt_dax_type, ext4_param_dax),
1698         fsparam_u32     ("stripe",              Opt_stripe),
1699         fsparam_flag    ("delalloc",            Opt_delalloc),
1700         fsparam_flag    ("nodelalloc",          Opt_nodelalloc),
1701         fsparam_flag    ("warn_on_error",       Opt_warn_on_error),
1702         fsparam_flag    ("nowarn_on_error",     Opt_nowarn_on_error),
1703         fsparam_u32     ("debug_want_extra_isize",
1704                                                 Opt_debug_want_extra_isize),
1705         fsparam_flag    ("mblk_io_submit",      Opt_removed),
1706         fsparam_flag    ("nomblk_io_submit",    Opt_removed),
1707         fsparam_flag    ("block_validity",      Opt_block_validity),
1708         fsparam_flag    ("noblock_validity",    Opt_noblock_validity),
1709         fsparam_u32     ("inode_readahead_blks",
1710                                                 Opt_inode_readahead_blks),
1711         fsparam_u32     ("journal_ioprio",      Opt_journal_ioprio),
1712         fsparam_u32     ("auto_da_alloc",       Opt_auto_da_alloc),
1713         fsparam_flag    ("auto_da_alloc",       Opt_auto_da_alloc),
1714         fsparam_flag    ("noauto_da_alloc",     Opt_noauto_da_alloc),
1715         fsparam_flag    ("dioread_nolock",      Opt_dioread_nolock),
1716         fsparam_flag    ("nodioread_nolock",    Opt_dioread_lock),
1717         fsparam_flag    ("dioread_lock",        Opt_dioread_lock),
1718         fsparam_flag    ("discard",             Opt_discard),
1719         fsparam_flag    ("nodiscard",           Opt_nodiscard),
1720         fsparam_u32     ("init_itable",         Opt_init_itable),
1721         fsparam_flag    ("init_itable",         Opt_init_itable),
1722         fsparam_flag    ("noinit_itable",       Opt_noinit_itable),
1723 #ifdef CONFIG_EXT4_DEBUG
1724         fsparam_flag    ("fc_debug_force",      Opt_fc_debug_force),
1725         fsparam_u32     ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1726 #endif
1727         fsparam_u32     ("max_dir_size_kb",     Opt_max_dir_size_kb),
1728         fsparam_flag    ("test_dummy_encryption",
1729                                                 Opt_test_dummy_encryption),
1730         fsparam_string  ("test_dummy_encryption",
1731                                                 Opt_test_dummy_encryption),
1732         fsparam_flag    ("inlinecrypt",         Opt_inlinecrypt),
1733         fsparam_flag    ("nombcache",           Opt_nombcache),
1734         fsparam_flag    ("no_mbcache",          Opt_nombcache), /* for backward compatibility */
1735         fsparam_flag    ("prefetch_block_bitmaps",
1736                                                 Opt_removed),
1737         fsparam_flag    ("no_prefetch_block_bitmaps",
1738                                                 Opt_no_prefetch_block_bitmaps),
1739         fsparam_s32     ("mb_optimize_scan",    Opt_mb_optimize_scan),
1740         fsparam_string  ("check",               Opt_removed),   /* mount option from ext2/3 */
1741         fsparam_flag    ("nocheck",             Opt_removed),   /* mount option from ext2/3 */
1742         fsparam_flag    ("reservation",         Opt_removed),   /* mount option from ext2/3 */
1743         fsparam_flag    ("noreservation",       Opt_removed),   /* mount option from ext2/3 */
1744         fsparam_u32     ("journal",             Opt_removed),   /* mount option from ext2/3 */
1745         {}
1746 };
1747
1748 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1749
1750 static const char deprecated_msg[] =
1751         "Mount option \"%s\" will be removed by %s\n"
1752         "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
1753
1754 #define MOPT_SET        0x0001
1755 #define MOPT_CLEAR      0x0002
1756 #define MOPT_NOSUPPORT  0x0004
1757 #define MOPT_EXPLICIT   0x0008
1758 #ifdef CONFIG_QUOTA
1759 #define MOPT_Q          0
1760 #define MOPT_QFMT       0x0010
1761 #else
1762 #define MOPT_Q          MOPT_NOSUPPORT
1763 #define MOPT_QFMT       MOPT_NOSUPPORT
1764 #endif
1765 #define MOPT_NO_EXT2    0x0020
1766 #define MOPT_NO_EXT3    0x0040
1767 #define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1768 #define MOPT_SKIP       0x0080
1769 #define MOPT_2          0x0100
1770
1771 static const struct mount_opts {
1772         int     token;
1773         int     mount_opt;
1774         int     flags;
1775 } ext4_mount_opts[] = {
1776         {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1777         {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1778         {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1779         {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1780         {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1781         {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1782         {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1783          MOPT_EXT4_ONLY | MOPT_SET},
1784         {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1785          MOPT_EXT4_ONLY | MOPT_CLEAR},
1786         {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1787         {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1788         {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1789          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1790         {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1791          MOPT_EXT4_ONLY | MOPT_CLEAR},
1792         {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1793         {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1794         {Opt_commit, 0, MOPT_NO_EXT2},
1795         {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1796          MOPT_EXT4_ONLY | MOPT_CLEAR},
1797         {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1798          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1799         {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1800                                     EXT4_MOUNT_JOURNAL_CHECKSUM),
1801          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1802         {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1803         {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1804         {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1805         {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1806         {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1807         {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1808         {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1809         {Opt_dax_type, 0, MOPT_EXT4_ONLY},
1810         {Opt_journal_dev, 0, MOPT_NO_EXT2},
1811         {Opt_journal_path, 0, MOPT_NO_EXT2},
1812         {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1813         {Opt_data, 0, MOPT_NO_EXT2},
1814         {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1815 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1816         {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1817 #else
1818         {Opt_acl, 0, MOPT_NOSUPPORT},
1819 #endif
1820         {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1821         {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1822         {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1823         {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1824                                                         MOPT_SET | MOPT_Q},
1825         {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1826                                                         MOPT_SET | MOPT_Q},
1827         {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1828                                                         MOPT_SET | MOPT_Q},
1829         {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1830                        EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1831                                                         MOPT_CLEAR | MOPT_Q},
1832         {Opt_usrjquota, 0, MOPT_Q},
1833         {Opt_grpjquota, 0, MOPT_Q},
1834         {Opt_jqfmt, 0, MOPT_QFMT},
1835         {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1836         {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1837          MOPT_SET},
1838 #ifdef CONFIG_EXT4_DEBUG
1839         {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1840          MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1841 #endif
1842         {Opt_err, 0, 0}
1843 };
1844
1845 #if IS_ENABLED(CONFIG_UNICODE)
1846 static const struct ext4_sb_encodings {
1847         __u16 magic;
1848         char *name;
1849         unsigned int version;
1850 } ext4_sb_encoding_map[] = {
1851         {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1852 };
1853
1854 static const struct ext4_sb_encodings *
1855 ext4_sb_read_encoding(const struct ext4_super_block *es)
1856 {
1857         __u16 magic = le16_to_cpu(es->s_encoding);
1858         int i;
1859
1860         for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1861                 if (magic == ext4_sb_encoding_map[i].magic)
1862                         return &ext4_sb_encoding_map[i];
1863
1864         return NULL;
1865 }
1866 #endif
1867
1868 #define EXT4_SPEC_JQUOTA                        (1 <<  0)
1869 #define EXT4_SPEC_JQFMT                         (1 <<  1)
1870 #define EXT4_SPEC_DATAJ                         (1 <<  2)
1871 #define EXT4_SPEC_SB_BLOCK                      (1 <<  3)
1872 #define EXT4_SPEC_JOURNAL_DEV                   (1 <<  4)
1873 #define EXT4_SPEC_JOURNAL_IOPRIO                (1 <<  5)
1874 #define EXT4_SPEC_s_want_extra_isize            (1 <<  7)
1875 #define EXT4_SPEC_s_max_batch_time              (1 <<  8)
1876 #define EXT4_SPEC_s_min_batch_time              (1 <<  9)
1877 #define EXT4_SPEC_s_inode_readahead_blks        (1 << 10)
1878 #define EXT4_SPEC_s_li_wait_mult                (1 << 11)
1879 #define EXT4_SPEC_s_max_dir_size_kb             (1 << 12)
1880 #define EXT4_SPEC_s_stripe                      (1 << 13)
1881 #define EXT4_SPEC_s_resuid                      (1 << 14)
1882 #define EXT4_SPEC_s_resgid                      (1 << 15)
1883 #define EXT4_SPEC_s_commit_interval             (1 << 16)
1884 #define EXT4_SPEC_s_fc_debug_max_replay         (1 << 17)
1885 #define EXT4_SPEC_s_sb_block                    (1 << 18)
1886 #define EXT4_SPEC_mb_optimize_scan              (1 << 19)
1887
1888 struct ext4_fs_context {
1889         char            *s_qf_names[EXT4_MAXQUOTAS];
1890         struct fscrypt_dummy_policy dummy_enc_policy;
1891         int             s_jquota_fmt;   /* Format of quota to use */
1892 #ifdef CONFIG_EXT4_DEBUG
1893         int s_fc_debug_max_replay;
1894 #endif
1895         unsigned short  qname_spec;
1896         unsigned long   vals_s_flags;   /* Bits to set in s_flags */
1897         unsigned long   mask_s_flags;   /* Bits changed in s_flags */
1898         unsigned long   journal_devnum;
1899         unsigned long   s_commit_interval;
1900         unsigned long   s_stripe;
1901         unsigned int    s_inode_readahead_blks;
1902         unsigned int    s_want_extra_isize;
1903         unsigned int    s_li_wait_mult;
1904         unsigned int    s_max_dir_size_kb;
1905         unsigned int    journal_ioprio;
1906         unsigned int    vals_s_mount_opt;
1907         unsigned int    mask_s_mount_opt;
1908         unsigned int    vals_s_mount_opt2;
1909         unsigned int    mask_s_mount_opt2;
1910         unsigned long   vals_s_mount_flags;
1911         unsigned long   mask_s_mount_flags;
1912         unsigned int    opt_flags;      /* MOPT flags */
1913         unsigned int    spec;
1914         u32             s_max_batch_time;
1915         u32             s_min_batch_time;
1916         kuid_t          s_resuid;
1917         kgid_t          s_resgid;
1918         ext4_fsblk_t    s_sb_block;
1919 };
1920
1921 static void ext4_fc_free(struct fs_context *fc)
1922 {
1923         struct ext4_fs_context *ctx = fc->fs_private;
1924         int i;
1925
1926         if (!ctx)
1927                 return;
1928
1929         for (i = 0; i < EXT4_MAXQUOTAS; i++)
1930                 kfree(ctx->s_qf_names[i]);
1931
1932         fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
1933         kfree(ctx);
1934 }
1935
1936 int ext4_init_fs_context(struct fs_context *fc)
1937 {
1938         struct ext4_fs_context *ctx;
1939
1940         ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
1941         if (!ctx)
1942                 return -ENOMEM;
1943
1944         fc->fs_private = ctx;
1945         fc->ops = &ext4_context_ops;
1946
1947         return 0;
1948 }
1949
1950 #ifdef CONFIG_QUOTA
1951 /*
1952  * Note the name of the specified quota file.
1953  */
1954 static int note_qf_name(struct fs_context *fc, int qtype,
1955                        struct fs_parameter *param)
1956 {
1957         struct ext4_fs_context *ctx = fc->fs_private;
1958         char *qname;
1959
1960         if (param->size < 1) {
1961                 ext4_msg(NULL, KERN_ERR, "Missing quota name");
1962                 return -EINVAL;
1963         }
1964         if (strchr(param->string, '/')) {
1965                 ext4_msg(NULL, KERN_ERR,
1966                          "quotafile must be on filesystem root");
1967                 return -EINVAL;
1968         }
1969         if (ctx->s_qf_names[qtype]) {
1970                 if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
1971                         ext4_msg(NULL, KERN_ERR,
1972                                  "%s quota file already specified",
1973                                  QTYPE2NAME(qtype));
1974                         return -EINVAL;
1975                 }
1976                 return 0;
1977         }
1978
1979         qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
1980         if (!qname) {
1981                 ext4_msg(NULL, KERN_ERR,
1982                          "Not enough memory for storing quotafile name");
1983                 return -ENOMEM;
1984         }
1985         ctx->s_qf_names[qtype] = qname;
1986         ctx->qname_spec |= 1 << qtype;
1987         ctx->spec |= EXT4_SPEC_JQUOTA;
1988         return 0;
1989 }
1990
1991 /*
1992  * Clear the name of the specified quota file.
1993  */
1994 static int unnote_qf_name(struct fs_context *fc, int qtype)
1995 {
1996         struct ext4_fs_context *ctx = fc->fs_private;
1997
1998         if (ctx->s_qf_names[qtype])
1999                 kfree(ctx->s_qf_names[qtype]);
2000
2001         ctx->s_qf_names[qtype] = NULL;
2002         ctx->qname_spec |= 1 << qtype;
2003         ctx->spec |= EXT4_SPEC_JQUOTA;
2004         return 0;
2005 }
2006 #endif
2007
2008 static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2009                                             struct ext4_fs_context *ctx)
2010 {
2011         int err;
2012
2013         if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2014                 ext4_msg(NULL, KERN_WARNING,
2015                          "test_dummy_encryption option not supported");
2016                 return -EINVAL;
2017         }
2018         err = fscrypt_parse_test_dummy_encryption(param,
2019                                                   &ctx->dummy_enc_policy);
2020         if (err == -EINVAL) {
2021                 ext4_msg(NULL, KERN_WARNING,
2022                          "Value of option \"%s\" is unrecognized", param->key);
2023         } else if (err == -EEXIST) {
2024                 ext4_msg(NULL, KERN_WARNING,
2025                          "Conflicting test_dummy_encryption options");
2026                 return -EINVAL;
2027         }
2028         return err;
2029 }
2030
2031 #define EXT4_SET_CTX(name)                                              \
2032 static inline void ctx_set_##name(struct ext4_fs_context *ctx,          \
2033                                   unsigned long flag)                   \
2034 {                                                                       \
2035         ctx->mask_s_##name |= flag;                                     \
2036         ctx->vals_s_##name |= flag;                                     \
2037 }
2038
2039 #define EXT4_CLEAR_CTX(name)                                            \
2040 static inline void ctx_clear_##name(struct ext4_fs_context *ctx,        \
2041                                     unsigned long flag)                 \
2042 {                                                                       \
2043         ctx->mask_s_##name |= flag;                                     \
2044         ctx->vals_s_##name &= ~flag;                                    \
2045 }
2046
2047 #define EXT4_TEST_CTX(name)                                             \
2048 static inline unsigned long                                             \
2049 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag)        \
2050 {                                                                       \
2051         return (ctx->vals_s_##name & flag);                             \
2052 }
2053
2054 EXT4_SET_CTX(flags); /* set only */
2055 EXT4_SET_CTX(mount_opt);
2056 EXT4_CLEAR_CTX(mount_opt);
2057 EXT4_TEST_CTX(mount_opt);
2058 EXT4_SET_CTX(mount_opt2);
2059 EXT4_CLEAR_CTX(mount_opt2);
2060 EXT4_TEST_CTX(mount_opt2);
2061
2062 static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
2063 {
2064         set_bit(bit, &ctx->mask_s_mount_flags);
2065         set_bit(bit, &ctx->vals_s_mount_flags);
2066 }
2067
2068 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2069 {
2070         struct ext4_fs_context *ctx = fc->fs_private;
2071         struct fs_parse_result result;
2072         const struct mount_opts *m;
2073         int is_remount;
2074         kuid_t uid;
2075         kgid_t gid;
2076         int token;
2077
2078         token = fs_parse(fc, ext4_param_specs, param, &result);
2079         if (token < 0)
2080                 return token;
2081         is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2082
2083         for (m = ext4_mount_opts; m->token != Opt_err; m++)
2084                 if (token == m->token)
2085                         break;
2086
2087         ctx->opt_flags |= m->flags;
2088
2089         if (m->flags & MOPT_EXPLICIT) {
2090                 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2091                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2092                 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2093                         ctx_set_mount_opt2(ctx,
2094                                        EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2095                 } else
2096                         return -EINVAL;
2097         }
2098
2099         if (m->flags & MOPT_NOSUPPORT) {
2100                 ext4_msg(NULL, KERN_ERR, "%s option not supported",
2101                          param->key);
2102                 return 0;
2103         }
2104
2105         switch (token) {
2106 #ifdef CONFIG_QUOTA
2107         case Opt_usrjquota:
2108                 if (!*param->string)
2109                         return unnote_qf_name(fc, USRQUOTA);
2110                 else
2111                         return note_qf_name(fc, USRQUOTA, param);
2112         case Opt_grpjquota:
2113                 if (!*param->string)
2114                         return unnote_qf_name(fc, GRPQUOTA);
2115                 else
2116                         return note_qf_name(fc, GRPQUOTA, param);
2117 #endif
2118         case Opt_sb:
2119                 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2120                         ext4_msg(NULL, KERN_WARNING,
2121                                  "Ignoring %s option on remount", param->key);
2122                 } else {
2123                         ctx->s_sb_block = result.uint_32;
2124                         ctx->spec |= EXT4_SPEC_s_sb_block;
2125                 }
2126                 return 0;
2127         case Opt_removed:
2128                 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2129                          param->key);
2130                 return 0;
2131         case Opt_abort:
2132                 ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
2133                 return 0;
2134         case Opt_i_version:
2135                 ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20");
2136                 ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n");
2137                 ctx_set_flags(ctx, SB_I_VERSION);
2138                 return 0;
2139         case Opt_inlinecrypt:
2140 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2141                 ctx_set_flags(ctx, SB_INLINECRYPT);
2142 #else
2143                 ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2144 #endif
2145                 return 0;
2146         case Opt_errors:
2147                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2148                 ctx_set_mount_opt(ctx, result.uint_32);
2149                 return 0;
2150 #ifdef CONFIG_QUOTA
2151         case Opt_jqfmt:
2152                 ctx->s_jquota_fmt = result.uint_32;
2153                 ctx->spec |= EXT4_SPEC_JQFMT;
2154                 return 0;
2155 #endif
2156         case Opt_data:
2157                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2158                 ctx_set_mount_opt(ctx, result.uint_32);
2159                 ctx->spec |= EXT4_SPEC_DATAJ;
2160                 return 0;
2161         case Opt_commit:
2162                 if (result.uint_32 == 0)
2163                         ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE;
2164                 else if (result.uint_32 > INT_MAX / HZ) {
2165                         ext4_msg(NULL, KERN_ERR,
2166                                  "Invalid commit interval %d, "
2167                                  "must be smaller than %d",
2168                                  result.uint_32, INT_MAX / HZ);
2169                         return -EINVAL;
2170                 }
2171                 ctx->s_commit_interval = HZ * result.uint_32;
2172                 ctx->spec |= EXT4_SPEC_s_commit_interval;
2173                 return 0;
2174         case Opt_debug_want_extra_isize:
2175                 if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2176                         ext4_msg(NULL, KERN_ERR,
2177                                  "Invalid want_extra_isize %d", result.uint_32);
2178                         return -EINVAL;
2179                 }
2180                 ctx->s_want_extra_isize = result.uint_32;
2181                 ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2182                 return 0;
2183         case Opt_max_batch_time:
2184                 ctx->s_max_batch_time = result.uint_32;
2185                 ctx->spec |= EXT4_SPEC_s_max_batch_time;
2186                 return 0;
2187         case Opt_min_batch_time:
2188                 ctx->s_min_batch_time = result.uint_32;
2189                 ctx->spec |= EXT4_SPEC_s_min_batch_time;
2190                 return 0;
2191         case Opt_inode_readahead_blks:
2192                 if (result.uint_32 &&
2193                     (result.uint_32 > (1 << 30) ||
2194                      !is_power_of_2(result.uint_32))) {
2195                         ext4_msg(NULL, KERN_ERR,
2196                                  "EXT4-fs: inode_readahead_blks must be "
2197                                  "0 or a power of 2 smaller than 2^31");
2198                         return -EINVAL;
2199                 }
2200                 ctx->s_inode_readahead_blks = result.uint_32;
2201                 ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2202                 return 0;
2203         case Opt_init_itable:
2204                 ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2205                 ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2206                 if (param->type == fs_value_is_string)
2207                         ctx->s_li_wait_mult = result.uint_32;
2208                 ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2209                 return 0;
2210         case Opt_max_dir_size_kb:
2211                 ctx->s_max_dir_size_kb = result.uint_32;
2212                 ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2213                 return 0;
2214 #ifdef CONFIG_EXT4_DEBUG
2215         case Opt_fc_debug_max_replay:
2216                 ctx->s_fc_debug_max_replay = result.uint_32;
2217                 ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2218                 return 0;
2219 #endif
2220         case Opt_stripe:
2221                 ctx->s_stripe = result.uint_32;
2222                 ctx->spec |= EXT4_SPEC_s_stripe;
2223                 return 0;
2224         case Opt_resuid:
2225                 uid = make_kuid(current_user_ns(), result.uint_32);
2226                 if (!uid_valid(uid)) {
2227                         ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
2228                                  result.uint_32);
2229                         return -EINVAL;
2230                 }
2231                 ctx->s_resuid = uid;
2232                 ctx->spec |= EXT4_SPEC_s_resuid;
2233                 return 0;
2234         case Opt_resgid:
2235                 gid = make_kgid(current_user_ns(), result.uint_32);
2236                 if (!gid_valid(gid)) {
2237                         ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
2238                                  result.uint_32);
2239                         return -EINVAL;
2240                 }
2241                 ctx->s_resgid = gid;
2242                 ctx->spec |= EXT4_SPEC_s_resgid;
2243                 return 0;
2244         case Opt_journal_dev:
2245                 if (is_remount) {
2246                         ext4_msg(NULL, KERN_ERR,
2247                                  "Cannot specify journal on remount");
2248                         return -EINVAL;
2249                 }
2250                 ctx->journal_devnum = result.uint_32;
2251                 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2252                 return 0;
2253         case Opt_journal_path:
2254         {
2255                 struct inode *journal_inode;
2256                 struct path path;
2257                 int error;
2258
2259                 if (is_remount) {
2260                         ext4_msg(NULL, KERN_ERR,
2261                                  "Cannot specify journal on remount");
2262                         return -EINVAL;
2263                 }
2264
2265                 error = fs_lookup_param(fc, param, 1, &path);
2266                 if (error) {
2267                         ext4_msg(NULL, KERN_ERR, "error: could not find "
2268                                  "journal device path");
2269                         return -EINVAL;
2270                 }
2271
2272                 journal_inode = d_inode(path.dentry);
2273                 ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2274                 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2275                 path_put(&path);
2276                 return 0;
2277         }
2278         case Opt_journal_ioprio:
2279                 if (result.uint_32 > 7) {
2280                         ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2281                                  " (must be 0-7)");
2282                         return -EINVAL;
2283                 }
2284                 ctx->journal_ioprio =
2285                         IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2286                 ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2287                 return 0;
2288         case Opt_test_dummy_encryption:
2289                 return ext4_parse_test_dummy_encryption(param, ctx);
2290         case Opt_dax:
2291         case Opt_dax_type:
2292 #ifdef CONFIG_FS_DAX
2293         {
2294                 int type = (token == Opt_dax) ?
2295                            Opt_dax : result.uint_32;
2296
2297                 switch (type) {
2298                 case Opt_dax:
2299                 case Opt_dax_always:
2300                         ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2301                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2302                         break;
2303                 case Opt_dax_never:
2304                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2305                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2306                         break;
2307                 case Opt_dax_inode:
2308                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2309                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2310                         /* Strictly for printing options */
2311                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2312                         break;
2313                 }
2314                 return 0;
2315         }
2316 #else
2317                 ext4_msg(NULL, KERN_INFO, "dax option not supported");
2318                 return -EINVAL;
2319 #endif
2320         case Opt_data_err:
2321                 if (result.uint_32 == Opt_data_err_abort)
2322                         ctx_set_mount_opt(ctx, m->mount_opt);
2323                 else if (result.uint_32 == Opt_data_err_ignore)
2324                         ctx_clear_mount_opt(ctx, m->mount_opt);
2325                 return 0;
2326         case Opt_mb_optimize_scan:
2327                 if (result.int_32 == 1) {
2328                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2329                         ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2330                 } else if (result.int_32 == 0) {
2331                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2332                         ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2333                 } else {
2334                         ext4_msg(NULL, KERN_WARNING,
2335                                  "mb_optimize_scan should be set to 0 or 1.");
2336                         return -EINVAL;
2337                 }
2338                 return 0;
2339         }
2340
2341         /*
2342          * At this point we should only be getting options requiring MOPT_SET,
2343          * or MOPT_CLEAR. Anything else is a bug
2344          */
2345         if (m->token == Opt_err) {
2346                 ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2347                          param->key);
2348                 WARN_ON(1);
2349                 return -EINVAL;
2350         }
2351
2352         else {
2353                 unsigned int set = 0;
2354
2355                 if ((param->type == fs_value_is_flag) ||
2356                     result.uint_32 > 0)
2357                         set = 1;
2358
2359                 if (m->flags & MOPT_CLEAR)
2360                         set = !set;
2361                 else if (unlikely(!(m->flags & MOPT_SET))) {
2362                         ext4_msg(NULL, KERN_WARNING,
2363                                  "buggy handling of option %s",
2364                                  param->key);
2365                         WARN_ON(1);
2366                         return -EINVAL;
2367                 }
2368                 if (m->flags & MOPT_2) {
2369                         if (set != 0)
2370                                 ctx_set_mount_opt2(ctx, m->mount_opt);
2371                         else
2372                                 ctx_clear_mount_opt2(ctx, m->mount_opt);
2373                 } else {
2374                         if (set != 0)
2375                                 ctx_set_mount_opt(ctx, m->mount_opt);
2376                         else
2377                                 ctx_clear_mount_opt(ctx, m->mount_opt);
2378                 }
2379         }
2380
2381         return 0;
2382 }
2383
2384 static int parse_options(struct fs_context *fc, char *options)
2385 {
2386         struct fs_parameter param;
2387         int ret;
2388         char *key;
2389
2390         if (!options)
2391                 return 0;
2392
2393         while ((key = strsep(&options, ",")) != NULL) {
2394                 if (*key) {
2395                         size_t v_len = 0;
2396                         char *value = strchr(key, '=');
2397
2398                         param.type = fs_value_is_flag;
2399                         param.string = NULL;
2400
2401                         if (value) {
2402                                 if (value == key)
2403                                         continue;
2404
2405                                 *value++ = 0;
2406                                 v_len = strlen(value);
2407                                 param.string = kmemdup_nul(value, v_len,
2408                                                            GFP_KERNEL);
2409                                 if (!param.string)
2410                                         return -ENOMEM;
2411                                 param.type = fs_value_is_string;
2412                         }
2413
2414                         param.key = key;
2415                         param.size = v_len;
2416
2417                         ret = ext4_parse_param(fc, &param);
2418                         if (param.string)
2419                                 kfree(param.string);
2420                         if (ret < 0)
2421                                 return ret;
2422                 }
2423         }
2424
2425         ret = ext4_validate_options(fc);
2426         if (ret < 0)
2427                 return ret;
2428
2429         return 0;
2430 }
2431
2432 static int parse_apply_sb_mount_options(struct super_block *sb,
2433                                         struct ext4_fs_context *m_ctx)
2434 {
2435         struct ext4_sb_info *sbi = EXT4_SB(sb);
2436         char *s_mount_opts = NULL;
2437         struct ext4_fs_context *s_ctx = NULL;
2438         struct fs_context *fc = NULL;
2439         int ret = -ENOMEM;
2440
2441         if (!sbi->s_es->s_mount_opts[0])
2442                 return 0;
2443
2444         s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2445                                 sizeof(sbi->s_es->s_mount_opts),
2446                                 GFP_KERNEL);
2447         if (!s_mount_opts)
2448                 return ret;
2449
2450         fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2451         if (!fc)
2452                 goto out_free;
2453
2454         s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2455         if (!s_ctx)
2456                 goto out_free;
2457
2458         fc->fs_private = s_ctx;
2459         fc->s_fs_info = sbi;
2460
2461         ret = parse_options(fc, s_mount_opts);
2462         if (ret < 0)
2463                 goto parse_failed;
2464
2465         ret = ext4_check_opt_consistency(fc, sb);
2466         if (ret < 0) {
2467 parse_failed:
2468                 ext4_msg(sb, KERN_WARNING,
2469                          "failed to parse options in superblock: %s",
2470                          s_mount_opts);
2471                 ret = 0;
2472                 goto out_free;
2473         }
2474
2475         if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2476                 m_ctx->journal_devnum = s_ctx->journal_devnum;
2477         if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2478                 m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2479
2480         ext4_apply_options(fc, sb);
2481         ret = 0;
2482
2483 out_free:
2484         if (fc) {
2485                 ext4_fc_free(fc);
2486                 kfree(fc);
2487         }
2488         kfree(s_mount_opts);
2489         return ret;
2490 }
2491
2492 static void ext4_apply_quota_options(struct fs_context *fc,
2493                                      struct super_block *sb)
2494 {
2495 #ifdef CONFIG_QUOTA
2496         bool quota_feature = ext4_has_feature_quota(sb);
2497         struct ext4_fs_context *ctx = fc->fs_private;
2498         struct ext4_sb_info *sbi = EXT4_SB(sb);
2499         char *qname;
2500         int i;
2501
2502         if (quota_feature)
2503                 return;
2504
2505         if (ctx->spec & EXT4_SPEC_JQUOTA) {
2506                 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2507                         if (!(ctx->qname_spec & (1 << i)))
2508                                 continue;
2509
2510                         qname = ctx->s_qf_names[i]; /* May be NULL */
2511                         if (qname)
2512                                 set_opt(sb, QUOTA);
2513                         ctx->s_qf_names[i] = NULL;
2514                         qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2515                                                 lockdep_is_held(&sb->s_umount));
2516                         if (qname)
2517                                 kfree_rcu(qname);
2518                 }
2519         }
2520
2521         if (ctx->spec & EXT4_SPEC_JQFMT)
2522                 sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2523 #endif
2524 }
2525
2526 /*
2527  * Check quota settings consistency.
2528  */
2529 static int ext4_check_quota_consistency(struct fs_context *fc,
2530                                         struct super_block *sb)
2531 {
2532 #ifdef CONFIG_QUOTA
2533         struct ext4_fs_context *ctx = fc->fs_private;
2534         struct ext4_sb_info *sbi = EXT4_SB(sb);
2535         bool quota_feature = ext4_has_feature_quota(sb);
2536         bool quota_loaded = sb_any_quota_loaded(sb);
2537         bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2538         int quota_flags, i;
2539
2540         /*
2541          * We do the test below only for project quotas. 'usrquota' and
2542          * 'grpquota' mount options are allowed even without quota feature
2543          * to support legacy quotas in quota files.
2544          */
2545         if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2546             !ext4_has_feature_project(sb)) {
2547                 ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2548                          "Cannot enable project quota enforcement.");
2549                 return -EINVAL;
2550         }
2551
2552         quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2553                       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2554         if (quota_loaded &&
2555             ctx->mask_s_mount_opt & quota_flags &&
2556             !ctx_test_mount_opt(ctx, quota_flags))
2557                 goto err_quota_change;
2558
2559         if (ctx->spec & EXT4_SPEC_JQUOTA) {
2560
2561                 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2562                         if (!(ctx->qname_spec & (1 << i)))
2563                                 continue;
2564
2565                         if (quota_loaded &&
2566                             !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2567                                 goto err_jquota_change;
2568
2569                         if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2570                             strcmp(get_qf_name(sb, sbi, i),
2571                                    ctx->s_qf_names[i]) != 0)
2572                                 goto err_jquota_specified;
2573                 }
2574
2575                 if (quota_feature) {
2576                         ext4_msg(NULL, KERN_INFO,
2577                                  "Journaled quota options ignored when "
2578                                  "QUOTA feature is enabled");
2579                         return 0;
2580                 }
2581         }
2582
2583         if (ctx->spec & EXT4_SPEC_JQFMT) {
2584                 if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2585                         goto err_jquota_change;
2586                 if (quota_feature) {
2587                         ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2588                                  "ignored when QUOTA feature is enabled");
2589                         return 0;
2590                 }
2591         }
2592
2593         /* Make sure we don't mix old and new quota format */
2594         usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2595                        ctx->s_qf_names[USRQUOTA]);
2596         grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2597                        ctx->s_qf_names[GRPQUOTA]);
2598
2599         usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2600                     test_opt(sb, USRQUOTA));
2601
2602         grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2603                     test_opt(sb, GRPQUOTA));
2604
2605         if (usr_qf_name) {
2606                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2607                 usrquota = false;
2608         }
2609         if (grp_qf_name) {
2610                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2611                 grpquota = false;
2612         }
2613
2614         if (usr_qf_name || grp_qf_name) {
2615                 if (usrquota || grpquota) {
2616                         ext4_msg(NULL, KERN_ERR, "old and new quota "
2617                                  "format mixing");
2618                         return -EINVAL;
2619                 }
2620
2621                 if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2622                         ext4_msg(NULL, KERN_ERR, "journaled quota format "
2623                                  "not specified");
2624                         return -EINVAL;
2625                 }
2626         }
2627
2628         return 0;
2629
2630 err_quota_change:
2631         ext4_msg(NULL, KERN_ERR,
2632                  "Cannot change quota options when quota turned on");
2633         return -EINVAL;
2634 err_jquota_change:
2635         ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2636                  "options when quota turned on");
2637         return -EINVAL;
2638 err_jquota_specified:
2639         ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2640                  QTYPE2NAME(i));
2641         return -EINVAL;
2642 #else
2643         return 0;
2644 #endif
2645 }
2646
2647 static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2648                                             struct super_block *sb)
2649 {
2650         const struct ext4_fs_context *ctx = fc->fs_private;
2651         const struct ext4_sb_info *sbi = EXT4_SB(sb);
2652         int err;
2653
2654         if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2655                 return 0;
2656
2657         if (!ext4_has_feature_encrypt(sb)) {
2658                 ext4_msg(NULL, KERN_WARNING,
2659                          "test_dummy_encryption requires encrypt feature");
2660                 return -EINVAL;
2661         }
2662         /*
2663          * This mount option is just for testing, and it's not worthwhile to
2664          * implement the extra complexity (e.g. RCU protection) that would be
2665          * needed to allow it to be set or changed during remount.  We do allow
2666          * it to be specified during remount, but only if there is no change.
2667          */
2668         if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2669                 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2670                                                  &ctx->dummy_enc_policy))
2671                         return 0;
2672                 ext4_msg(NULL, KERN_WARNING,
2673                          "Can't set or change test_dummy_encryption on remount");
2674                 return -EINVAL;
2675         }
2676         /* Also make sure s_mount_opts didn't contain a conflicting value. */
2677         if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2678                 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2679                                                  &ctx->dummy_enc_policy))
2680                         return 0;
2681                 ext4_msg(NULL, KERN_WARNING,
2682                          "Conflicting test_dummy_encryption options");
2683                 return -EINVAL;
2684         }
2685         /*
2686          * fscrypt_add_test_dummy_key() technically changes the super_block, so
2687          * technically it should be delayed until ext4_apply_options() like the
2688          * other changes.  But since we never get here for remounts (see above),
2689          * and this is the last chance to report errors, we do it here.
2690          */
2691         err = fscrypt_add_test_dummy_key(sb, &ctx->dummy_enc_policy);
2692         if (err)
2693                 ext4_msg(NULL, KERN_WARNING,
2694                          "Error adding test dummy encryption key [%d]", err);
2695         return err;
2696 }
2697
2698 static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2699                                              struct super_block *sb)
2700 {
2701         if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2702             /* if already set, it was already verified to be the same */
2703             fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2704                 return;
2705         EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2706         memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2707         ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2708 }
2709
2710 static int ext4_check_opt_consistency(struct fs_context *fc,
2711                                       struct super_block *sb)
2712 {
2713         struct ext4_fs_context *ctx = fc->fs_private;
2714         struct ext4_sb_info *sbi = fc->s_fs_info;
2715         int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2716         int err;
2717
2718         if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2719                 ext4_msg(NULL, KERN_ERR,
2720                          "Mount option(s) incompatible with ext2");
2721                 return -EINVAL;
2722         }
2723         if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2724                 ext4_msg(NULL, KERN_ERR,
2725                          "Mount option(s) incompatible with ext3");
2726                 return -EINVAL;
2727         }
2728
2729         if (ctx->s_want_extra_isize >
2730             (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2731                 ext4_msg(NULL, KERN_ERR,
2732                          "Invalid want_extra_isize %d",
2733                          ctx->s_want_extra_isize);
2734                 return -EINVAL;
2735         }
2736
2737         if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2738                 int blocksize =
2739                         BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2740                 if (blocksize < PAGE_SIZE)
2741                         ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2742                                  "experimental mount option 'dioread_nolock' "
2743                                  "for blocksize < PAGE_SIZE");
2744         }
2745
2746         err = ext4_check_test_dummy_encryption(fc, sb);
2747         if (err)
2748                 return err;
2749
2750         if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2751                 if (!sbi->s_journal) {
2752                         ext4_msg(NULL, KERN_WARNING,
2753                                  "Remounting file system with no journal "
2754                                  "so ignoring journalled data option");
2755                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2756                 } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2757                            test_opt(sb, DATA_FLAGS)) {
2758                         ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2759                                  "on remount");
2760                         return -EINVAL;
2761                 }
2762         }
2763
2764         if (is_remount) {
2765                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2766                     (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2767                         ext4_msg(NULL, KERN_ERR, "can't mount with "
2768                                  "both data=journal and dax");
2769                         return -EINVAL;
2770                 }
2771
2772                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2773                     (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2774                      (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2775 fail_dax_change_remount:
2776                         ext4_msg(NULL, KERN_ERR, "can't change "
2777                                  "dax mount option while remounting");
2778                         return -EINVAL;
2779                 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2780                          (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2781                           (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2782                         goto fail_dax_change_remount;
2783                 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2784                            ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2785                             (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2786                             !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2787                         goto fail_dax_change_remount;
2788                 }
2789         }
2790
2791         return ext4_check_quota_consistency(fc, sb);
2792 }
2793
2794 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2795 {
2796         struct ext4_fs_context *ctx = fc->fs_private;
2797         struct ext4_sb_info *sbi = fc->s_fs_info;
2798
2799         sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2800         sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2801         sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2802         sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2803         sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
2804         sbi->s_mount_flags |= ctx->vals_s_mount_flags;
2805         sb->s_flags &= ~ctx->mask_s_flags;
2806         sb->s_flags |= ctx->vals_s_flags;
2807
2808         /*
2809          * i_version differs from common mount option iversion so we have
2810          * to let vfs know that it was set, otherwise it would get cleared
2811          * on remount
2812          */
2813         if (ctx->mask_s_flags & SB_I_VERSION)
2814                 fc->sb_flags |= SB_I_VERSION;
2815
2816 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2817         APPLY(s_commit_interval);
2818         APPLY(s_stripe);
2819         APPLY(s_max_batch_time);
2820         APPLY(s_min_batch_time);
2821         APPLY(s_want_extra_isize);
2822         APPLY(s_inode_readahead_blks);
2823         APPLY(s_max_dir_size_kb);
2824         APPLY(s_li_wait_mult);
2825         APPLY(s_resgid);
2826         APPLY(s_resuid);
2827
2828 #ifdef CONFIG_EXT4_DEBUG
2829         APPLY(s_fc_debug_max_replay);
2830 #endif
2831
2832         ext4_apply_quota_options(fc, sb);
2833         ext4_apply_test_dummy_encryption(ctx, sb);
2834 }
2835
2836
2837 static int ext4_validate_options(struct fs_context *fc)
2838 {
2839 #ifdef CONFIG_QUOTA
2840         struct ext4_fs_context *ctx = fc->fs_private;
2841         char *usr_qf_name, *grp_qf_name;
2842
2843         usr_qf_name = ctx->s_qf_names[USRQUOTA];
2844         grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2845
2846         if (usr_qf_name || grp_qf_name) {
2847                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2848                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2849
2850                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2851                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2852
2853                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2854                     ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2855                         ext4_msg(NULL, KERN_ERR, "old and new quota "
2856                                  "format mixing");
2857                         return -EINVAL;
2858                 }
2859         }
2860 #endif
2861         return 1;
2862 }
2863
2864 static inline void ext4_show_quota_options(struct seq_file *seq,
2865                                            struct super_block *sb)
2866 {
2867 #if defined(CONFIG_QUOTA)
2868         struct ext4_sb_info *sbi = EXT4_SB(sb);
2869         char *usr_qf_name, *grp_qf_name;
2870
2871         if (sbi->s_jquota_fmt) {
2872                 char *fmtname = "";
2873
2874                 switch (sbi->s_jquota_fmt) {
2875                 case QFMT_VFS_OLD:
2876                         fmtname = "vfsold";
2877                         break;
2878                 case QFMT_VFS_V0:
2879                         fmtname = "vfsv0";
2880                         break;
2881                 case QFMT_VFS_V1:
2882                         fmtname = "vfsv1";
2883                         break;
2884                 }
2885                 seq_printf(seq, ",jqfmt=%s", fmtname);
2886         }
2887
2888         rcu_read_lock();
2889         usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2890         grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2891         if (usr_qf_name)
2892                 seq_show_option(seq, "usrjquota", usr_qf_name);
2893         if (grp_qf_name)
2894                 seq_show_option(seq, "grpjquota", grp_qf_name);
2895         rcu_read_unlock();
2896 #endif
2897 }
2898
2899 static const char *token2str(int token)
2900 {
2901         const struct fs_parameter_spec *spec;
2902
2903         for (spec = ext4_param_specs; spec->name != NULL; spec++)
2904                 if (spec->opt == token && !spec->type)
2905                         break;
2906         return spec->name;
2907 }
2908
2909 /*
2910  * Show an option if
2911  *  - it's set to a non-default value OR
2912  *  - if the per-sb default is different from the global default
2913  */
2914 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2915                               int nodefs)
2916 {
2917         struct ext4_sb_info *sbi = EXT4_SB(sb);
2918         struct ext4_super_block *es = sbi->s_es;
2919         int def_errors, def_mount_opt = sbi->s_def_mount_opt;
2920         const struct mount_opts *m;
2921         char sep = nodefs ? '\n' : ',';
2922
2923 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2924 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2925
2926         if (sbi->s_sb_block != 1)
2927                 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2928
2929         for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2930                 int want_set = m->flags & MOPT_SET;
2931                 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2932                     m->flags & MOPT_SKIP)
2933                         continue;
2934                 if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
2935                         continue; /* skip if same as the default */
2936                 if ((want_set &&
2937                      (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
2938                     (!want_set && (sbi->s_mount_opt & m->mount_opt)))
2939                         continue; /* select Opt_noFoo vs Opt_Foo */
2940                 SEQ_OPTS_PRINT("%s", token2str(m->token));
2941         }
2942
2943         if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2944             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2945                 SEQ_OPTS_PRINT("resuid=%u",
2946                                 from_kuid_munged(&init_user_ns, sbi->s_resuid));
2947         if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2948             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2949                 SEQ_OPTS_PRINT("resgid=%u",
2950                                 from_kgid_munged(&init_user_ns, sbi->s_resgid));
2951         def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2952         if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2953                 SEQ_OPTS_PUTS("errors=remount-ro");
2954         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2955                 SEQ_OPTS_PUTS("errors=continue");
2956         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2957                 SEQ_OPTS_PUTS("errors=panic");
2958         if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2959                 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2960         if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2961                 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2962         if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2963                 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2964         if (sb->s_flags & SB_I_VERSION)
2965                 SEQ_OPTS_PUTS("i_version");
2966         if (nodefs || sbi->s_stripe)
2967                 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2968         if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2969                         (sbi->s_mount_opt ^ def_mount_opt)) {
2970                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2971                         SEQ_OPTS_PUTS("data=journal");
2972                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2973                         SEQ_OPTS_PUTS("data=ordered");
2974                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2975                         SEQ_OPTS_PUTS("data=writeback");
2976         }
2977         if (nodefs ||
2978             sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2979                 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2980                                sbi->s_inode_readahead_blks);
2981
2982         if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
2983                        (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2984                 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2985         if (nodefs || sbi->s_max_dir_size_kb)
2986                 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2987         if (test_opt(sb, DATA_ERR_ABORT))
2988                 SEQ_OPTS_PUTS("data_err=abort");
2989
2990         fscrypt_show_test_dummy_encryption(seq, sep, sb);
2991
2992         if (sb->s_flags & SB_INLINECRYPT)
2993                 SEQ_OPTS_PUTS("inlinecrypt");
2994
2995         if (test_opt(sb, DAX_ALWAYS)) {
2996                 if (IS_EXT2_SB(sb))
2997                         SEQ_OPTS_PUTS("dax");
2998                 else
2999                         SEQ_OPTS_PUTS("dax=always");
3000         } else if (test_opt2(sb, DAX_NEVER)) {
3001                 SEQ_OPTS_PUTS("dax=never");
3002         } else if (test_opt2(sb, DAX_INODE)) {
3003                 SEQ_OPTS_PUTS("dax=inode");
3004         }
3005
3006         if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3007                         !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3008                 SEQ_OPTS_PUTS("mb_optimize_scan=0");
3009         } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3010                         test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3011                 SEQ_OPTS_PUTS("mb_optimize_scan=1");
3012         }
3013
3014         ext4_show_quota_options(seq, sb);
3015         return 0;
3016 }
3017
3018 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3019 {
3020         return _ext4_show_options(seq, root->d_sb, 0);
3021 }
3022
3023 int ext4_seq_options_show(struct seq_file *seq, void *offset)
3024 {
3025         struct super_block *sb = seq->private;
3026         int rc;
3027
3028         seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3029         rc = _ext4_show_options(seq, sb, 1);
3030         seq_puts(seq, "\n");
3031         return rc;
3032 }
3033
3034 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3035                             int read_only)
3036 {
3037         struct ext4_sb_info *sbi = EXT4_SB(sb);
3038         int err = 0;
3039
3040         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3041                 ext4_msg(sb, KERN_ERR, "revision level too high, "
3042                          "forcing read-only mode");
3043                 err = -EROFS;
3044                 goto done;
3045         }
3046         if (read_only)
3047                 goto done;
3048         if (!(sbi->s_mount_state & EXT4_VALID_FS))
3049                 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3050                          "running e2fsck is recommended");
3051         else if (sbi->s_mount_state & EXT4_ERROR_FS)
3052                 ext4_msg(sb, KERN_WARNING,
3053                          "warning: mounting fs with errors, "
3054                          "running e2fsck is recommended");
3055         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3056                  le16_to_cpu(es->s_mnt_count) >=
3057                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3058                 ext4_msg(sb, KERN_WARNING,
3059                          "warning: maximal mount count reached, "
3060                          "running e2fsck is recommended");
3061         else if (le32_to_cpu(es->s_checkinterval) &&
3062                  (ext4_get_tstamp(es, s_lastcheck) +
3063                   le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3064                 ext4_msg(sb, KERN_WARNING,
3065                          "warning: checktime reached, "
3066                          "running e2fsck is recommended");
3067         if (!sbi->s_journal)
3068                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3069         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3070                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3071         le16_add_cpu(&es->s_mnt_count, 1);
3072         ext4_update_tstamp(es, s_mtime);
3073         if (sbi->s_journal) {
3074                 ext4_set_feature_journal_needs_recovery(sb);
3075                 if (ext4_has_feature_orphan_file(sb))
3076                         ext4_set_feature_orphan_present(sb);
3077         }
3078
3079         err = ext4_commit_super(sb);
3080 done:
3081         if (test_opt(sb, DEBUG))
3082                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3083                                 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3084                         sb->s_blocksize,
3085                         sbi->s_groups_count,
3086                         EXT4_BLOCKS_PER_GROUP(sb),
3087                         EXT4_INODES_PER_GROUP(sb),
3088                         sbi->s_mount_opt, sbi->s_mount_opt2);
3089         return err;
3090 }
3091
3092 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3093 {
3094         struct ext4_sb_info *sbi = EXT4_SB(sb);
3095         struct flex_groups **old_groups, **new_groups;
3096         int size, i, j;
3097
3098         if (!sbi->s_log_groups_per_flex)
3099                 return 0;
3100
3101         size = ext4_flex_group(sbi, ngroup - 1) + 1;
3102         if (size <= sbi->s_flex_groups_allocated)
3103                 return 0;
3104
3105         new_groups = kvzalloc(roundup_pow_of_two(size *
3106                               sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3107         if (!new_groups) {
3108                 ext4_msg(sb, KERN_ERR,
3109                          "not enough memory for %d flex group pointers", size);
3110                 return -ENOMEM;
3111         }
3112         for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3113                 new_groups[i] = kvzalloc(roundup_pow_of_two(
3114                                          sizeof(struct flex_groups)),
3115                                          GFP_KERNEL);
3116                 if (!new_groups[i]) {
3117                         for (j = sbi->s_flex_groups_allocated; j < i; j++)
3118                                 kvfree(new_groups[j]);
3119                         kvfree(new_groups);
3120                         ext4_msg(sb, KERN_ERR,
3121                                  "not enough memory for %d flex groups", size);
3122                         return -ENOMEM;
3123                 }
3124         }
3125         rcu_read_lock();
3126         old_groups = rcu_dereference(sbi->s_flex_groups);
3127         if (old_groups)
3128                 memcpy(new_groups, old_groups,
3129                        (sbi->s_flex_groups_allocated *
3130                         sizeof(struct flex_groups *)));
3131         rcu_read_unlock();
3132         rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3133         sbi->s_flex_groups_allocated = size;
3134         if (old_groups)
3135                 ext4_kvfree_array_rcu(old_groups);
3136         return 0;
3137 }
3138
3139 static int ext4_fill_flex_info(struct super_block *sb)
3140 {
3141         struct ext4_sb_info *sbi = EXT4_SB(sb);
3142         struct ext4_group_desc *gdp = NULL;
3143         struct flex_groups *fg;
3144         ext4_group_t flex_group;
3145         int i, err;
3146
3147         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3148         if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3149                 sbi->s_log_groups_per_flex = 0;
3150                 return 1;
3151         }
3152
3153         err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3154         if (err)
3155                 goto failed;
3156
3157         for (i = 0; i < sbi->s_groups_count; i++) {
3158                 gdp = ext4_get_group_desc(sb, i, NULL);
3159
3160                 flex_group = ext4_flex_group(sbi, i);
3161                 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3162                 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3163                 atomic64_add(ext4_free_group_clusters(sb, gdp),
3164                              &fg->free_clusters);
3165                 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3166         }
3167
3168         return 1;
3169 failed:
3170         return 0;
3171 }
3172
3173 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3174                                    struct ext4_group_desc *gdp)
3175 {
3176         int offset = offsetof(struct ext4_group_desc, bg_checksum);
3177         __u16 crc = 0;
3178         __le32 le_group = cpu_to_le32(block_group);
3179         struct ext4_sb_info *sbi = EXT4_SB(sb);
3180
3181         if (ext4_has_metadata_csum(sbi->s_sb)) {
3182                 /* Use new metadata_csum algorithm */
3183                 __u32 csum32;
3184                 __u16 dummy_csum = 0;
3185
3186                 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3187                                      sizeof(le_group));
3188                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3189                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3190                                      sizeof(dummy_csum));
3191                 offset += sizeof(dummy_csum);
3192                 if (offset < sbi->s_desc_size)
3193                         csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3194                                              sbi->s_desc_size - offset);
3195
3196                 crc = csum32 & 0xFFFF;
3197                 goto out;
3198         }
3199
3200         /* old crc16 code */
3201         if (!ext4_has_feature_gdt_csum(sb))
3202                 return 0;
3203
3204         crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3205         crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3206         crc = crc16(crc, (__u8 *)gdp, offset);
3207         offset += sizeof(gdp->bg_checksum); /* skip checksum */
3208         /* for checksum of struct ext4_group_desc do the rest...*/
3209         if (ext4_has_feature_64bit(sb) &&
3210             offset < le16_to_cpu(sbi->s_es->s_desc_size))
3211                 crc = crc16(crc, (__u8 *)gdp + offset,
3212                             le16_to_cpu(sbi->s_es->s_desc_size) -
3213                                 offset);
3214
3215 out:
3216         return cpu_to_le16(crc);
3217 }
3218
3219 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3220                                 struct ext4_group_desc *gdp)
3221 {
3222         if (ext4_has_group_desc_csum(sb) &&
3223             (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3224                 return 0;
3225
3226         return 1;
3227 }
3228
3229 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3230                               struct ext4_group_desc *gdp)
3231 {
3232         if (!ext4_has_group_desc_csum(sb))
3233                 return;
3234         gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3235 }
3236
3237 /* Called at mount-time, super-block is locked */
3238 static int ext4_check_descriptors(struct super_block *sb,
3239                                   ext4_fsblk_t sb_block,
3240                                   ext4_group_t *first_not_zeroed)
3241 {
3242         struct ext4_sb_info *sbi = EXT4_SB(sb);
3243         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3244         ext4_fsblk_t last_block;
3245         ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3246         ext4_fsblk_t block_bitmap;
3247         ext4_fsblk_t inode_bitmap;
3248         ext4_fsblk_t inode_table;
3249         int flexbg_flag = 0;
3250         ext4_group_t i, grp = sbi->s_groups_count;
3251
3252         if (ext4_has_feature_flex_bg(sb))
3253                 flexbg_flag = 1;
3254
3255         ext4_debug("Checking group descriptors");
3256
3257         for (i = 0; i < sbi->s_groups_count; i++) {
3258                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3259
3260                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
3261                         last_block = ext4_blocks_count(sbi->s_es) - 1;
3262                 else
3263                         last_block = first_block +
3264                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
3265
3266                 if ((grp == sbi->s_groups_count) &&
3267                    !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3268                         grp = i;
3269
3270                 block_bitmap = ext4_block_bitmap(sb, gdp);
3271                 if (block_bitmap == sb_block) {
3272                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3273                                  "Block bitmap for group %u overlaps "
3274                                  "superblock", i);
3275                         if (!sb_rdonly(sb))
3276                                 return 0;
3277                 }
3278                 if (block_bitmap >= sb_block + 1 &&
3279                     block_bitmap <= last_bg_block) {
3280                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3281                                  "Block bitmap for group %u overlaps "
3282                                  "block group descriptors", i);
3283                         if (!sb_rdonly(sb))
3284                                 return 0;
3285                 }
3286                 if (block_bitmap < first_block || block_bitmap > last_block) {
3287                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3288                                "Block bitmap for group %u not in group "
3289                                "(block %llu)!", i, block_bitmap);
3290                         return 0;
3291                 }
3292                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
3293                 if (inode_bitmap == sb_block) {
3294                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3295                                  "Inode bitmap for group %u overlaps "
3296                                  "superblock", i);
3297                         if (!sb_rdonly(sb))
3298                                 return 0;
3299                 }
3300                 if (inode_bitmap >= sb_block + 1 &&
3301                     inode_bitmap <= last_bg_block) {
3302                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3303                                  "Inode bitmap for group %u overlaps "
3304                                  "block group descriptors", i);
3305                         if (!sb_rdonly(sb))
3306                                 return 0;
3307                 }
3308                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
3309                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3310                                "Inode bitmap for group %u not in group "
3311                                "(block %llu)!", i, inode_bitmap);
3312                         return 0;
3313                 }
3314                 inode_table = ext4_inode_table(sb, gdp);
3315                 if (inode_table == sb_block) {
3316                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3317                                  "Inode table for group %u overlaps "
3318                                  "superblock", i);
3319                         if (!sb_rdonly(sb))
3320                                 return 0;
3321                 }
3322                 if (inode_table >= sb_block + 1 &&
3323                     inode_table <= last_bg_block) {
3324                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3325                                  "Inode table for group %u overlaps "
3326                                  "block group descriptors", i);
3327                         if (!sb_rdonly(sb))
3328                                 return 0;
3329                 }
3330                 if (inode_table < first_block ||
3331                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
3332                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3333                                "Inode table for group %u not in group "
3334                                "(block %llu)!", i, inode_table);
3335                         return 0;
3336                 }
3337                 ext4_lock_group(sb, i);
3338                 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3339                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3340                                  "Checksum for group %u failed (%u!=%u)",
3341                                  i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3342                                      gdp)), le16_to_cpu(gdp->bg_checksum));
3343                         if (!sb_rdonly(sb)) {
3344                                 ext4_unlock_group(sb, i);
3345                                 return 0;
3346                         }
3347                 }
3348                 ext4_unlock_group(sb, i);
3349                 if (!flexbg_flag)
3350                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
3351         }
3352         if (NULL != first_not_zeroed)
3353                 *first_not_zeroed = grp;
3354         return 1;
3355 }
3356
3357 /*
3358  * Maximal extent format file size.
3359  * Resulting logical blkno at s_maxbytes must fit in our on-disk
3360  * extent format containers, within a sector_t, and within i_blocks
3361  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
3362  * so that won't be a limiting factor.
3363  *
3364  * However there is other limiting factor. We do store extents in the form
3365  * of starting block and length, hence the resulting length of the extent
3366  * covering maximum file size must fit into on-disk format containers as
3367  * well. Given that length is always by 1 unit bigger than max unit (because
3368  * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3369  *
3370  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3371  */
3372 static loff_t ext4_max_size(int blkbits, int has_huge_files)
3373 {
3374         loff_t res;
3375         loff_t upper_limit = MAX_LFS_FILESIZE;
3376
3377         BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3378
3379         if (!has_huge_files) {
3380                 upper_limit = (1LL << 32) - 1;
3381
3382                 /* total blocks in file system block size */
3383                 upper_limit >>= (blkbits - 9);
3384                 upper_limit <<= blkbits;
3385         }
3386
3387         /*
3388          * 32-bit extent-start container, ee_block. We lower the maxbytes
3389          * by one fs block, so ee_len can cover the extent of maximum file
3390          * size
3391          */
3392         res = (1LL << 32) - 1;
3393         res <<= blkbits;
3394
3395         /* Sanity check against vm- & vfs- imposed limits */
3396         if (res > upper_limit)
3397                 res = upper_limit;
3398
3399         return res;
3400 }
3401
3402 /*
3403  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
3404  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3405  * We need to be 1 filesystem block less than the 2^48 sector limit.
3406  */
3407 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3408 {
3409         loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3410         int meta_blocks;
3411         unsigned int ppb = 1 << (bits - 2);
3412
3413         /*
3414          * This is calculated to be the largest file size for a dense, block
3415          * mapped file such that the file's total number of 512-byte sectors,
3416          * including data and all indirect blocks, does not exceed (2^48 - 1).
3417          *
3418          * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3419          * number of 512-byte sectors of the file.
3420          */
3421         if (!has_huge_files) {
3422                 /*
3423                  * !has_huge_files or implies that the inode i_block field
3424                  * represents total file blocks in 2^32 512-byte sectors ==
3425                  * size of vfs inode i_blocks * 8
3426                  */
3427                 upper_limit = (1LL << 32) - 1;
3428
3429                 /* total blocks in file system block size */
3430                 upper_limit >>= (bits - 9);
3431
3432         } else {
3433                 /*
3434                  * We use 48 bit ext4_inode i_blocks
3435                  * With EXT4_HUGE_FILE_FL set the i_blocks
3436                  * represent total number of blocks in
3437                  * file system block size
3438                  */
3439                 upper_limit = (1LL << 48) - 1;
3440
3441         }
3442
3443         /* Compute how many blocks we can address by block tree */
3444         res += ppb;
3445         res += ppb * ppb;
3446         res += ((loff_t)ppb) * ppb * ppb;
3447         /* Compute how many metadata blocks are needed */
3448         meta_blocks = 1;
3449         meta_blocks += 1 + ppb;
3450         meta_blocks += 1 + ppb + ppb * ppb;
3451         /* Does block tree limit file size? */
3452         if (res + meta_blocks <= upper_limit)
3453                 goto check_lfs;
3454
3455         res = upper_limit;
3456         /* How many metadata blocks are needed for addressing upper_limit? */
3457         upper_limit -= EXT4_NDIR_BLOCKS;
3458         /* indirect blocks */
3459         meta_blocks = 1;
3460         upper_limit -= ppb;
3461         /* double indirect blocks */
3462         if (upper_limit < ppb * ppb) {
3463                 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3464                 res -= meta_blocks;
3465                 goto check_lfs;
3466         }
3467         meta_blocks += 1 + ppb;
3468         upper_limit -= ppb * ppb;
3469         /* tripple indirect blocks for the rest */
3470         meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3471                 DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3472         res -= meta_blocks;
3473 check_lfs:
3474         res <<= bits;
3475         if (res > MAX_LFS_FILESIZE)
3476                 res = MAX_LFS_FILESIZE;
3477
3478         return res;
3479 }
3480
3481 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3482                                    ext4_fsblk_t logical_sb_block, int nr)
3483 {
3484         struct ext4_sb_info *sbi = EXT4_SB(sb);
3485         ext4_group_t bg, first_meta_bg;
3486         int has_super = 0;
3487
3488         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3489
3490         if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3491                 return logical_sb_block + nr + 1;
3492         bg = sbi->s_desc_per_block * nr;
3493         if (ext4_bg_has_super(sb, bg))
3494                 has_super = 1;
3495
3496         /*
3497          * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3498          * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
3499          * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3500          * compensate.
3501          */
3502         if (sb->s_blocksize == 1024 && nr == 0 &&
3503             le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3504                 has_super++;
3505
3506         return (has_super + ext4_group_first_block_no(sb, bg));
3507 }
3508
3509 /**
3510  * ext4_get_stripe_size: Get the stripe size.
3511  * @sbi: In memory super block info
3512  *
3513  * If we have specified it via mount option, then
3514  * use the mount option value. If the value specified at mount time is
3515  * greater than the blocks per group use the super block value.
3516  * If the super block value is greater than blocks per group return 0.
3517  * Allocator needs it be less than blocks per group.
3518  *
3519  */
3520 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3521 {
3522         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3523         unsigned long stripe_width =
3524                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3525         int ret;
3526
3527         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3528                 ret = sbi->s_stripe;
3529         else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3530                 ret = stripe_width;
3531         else if (stride && stride <= sbi->s_blocks_per_group)
3532                 ret = stride;
3533         else
3534                 ret = 0;
3535
3536         /*
3537          * If the stripe width is 1, this makes no sense and
3538          * we set it to 0 to turn off stripe handling code.
3539          */
3540         if (ret <= 1)
3541                 ret = 0;
3542
3543         return ret;
3544 }
3545
3546 /*
3547  * Check whether this filesystem can be mounted based on
3548  * the features present and the RDONLY/RDWR mount requested.
3549  * Returns 1 if this filesystem can be mounted as requested,
3550  * 0 if it cannot be.
3551  */
3552 int ext4_feature_set_ok(struct super_block *sb, int readonly)
3553 {
3554         if (ext4_has_unknown_ext4_incompat_features(sb)) {
3555                 ext4_msg(sb, KERN_ERR,
3556                         "Couldn't mount because of "
3557                         "unsupported optional features (%x)",
3558                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3559                         ~EXT4_FEATURE_INCOMPAT_SUPP));
3560                 return 0;
3561         }
3562
3563 #if !IS_ENABLED(CONFIG_UNICODE)
3564         if (ext4_has_feature_casefold(sb)) {
3565                 ext4_msg(sb, KERN_ERR,
3566                          "Filesystem with casefold feature cannot be "
3567                          "mounted without CONFIG_UNICODE");
3568                 return 0;
3569         }
3570 #endif
3571
3572         if (readonly)
3573                 return 1;
3574
3575         if (ext4_has_feature_readonly(sb)) {
3576                 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3577                 sb->s_flags |= SB_RDONLY;
3578                 return 1;
3579         }
3580
3581         /* Check that feature set is OK for a read-write mount */
3582         if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3583                 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3584                          "unsupported optional features (%x)",
3585                          (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3586                                 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3587                 return 0;
3588         }
3589         if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3590                 ext4_msg(sb, KERN_ERR,
3591                          "Can't support bigalloc feature without "
3592                          "extents feature\n");
3593                 return 0;
3594         }
3595
3596 #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3597         if (!readonly && (ext4_has_feature_quota(sb) ||
3598                           ext4_has_feature_project(sb))) {
3599                 ext4_msg(sb, KERN_ERR,
3600                          "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3601                 return 0;
3602         }
3603 #endif  /* CONFIG_QUOTA */
3604         return 1;
3605 }
3606
3607 /*
3608  * This function is called once a day if we have errors logged
3609  * on the file system
3610  */
3611 static void print_daily_error_info(struct timer_list *t)
3612 {
3613         struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3614         struct super_block *sb = sbi->s_sb;
3615         struct ext4_super_block *es = sbi->s_es;
3616
3617         if (es->s_error_count)
3618                 /* fsck newer than v1.41.13 is needed to clean this condition. */
3619                 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3620                          le32_to_cpu(es->s_error_count));
3621         if (es->s_first_error_time) {
3622                 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3623                        sb->s_id,
3624                        ext4_get_tstamp(es, s_first_error_time),
3625                        (int) sizeof(es->s_first_error_func),
3626                        es->s_first_error_func,
3627                        le32_to_cpu(es->s_first_error_line));
3628                 if (es->s_first_error_ino)
3629                         printk(KERN_CONT ": inode %u",
3630                                le32_to_cpu(es->s_first_error_ino));
3631                 if (es->s_first_error_block)
3632                         printk(KERN_CONT ": block %llu", (unsigned long long)
3633                                le64_to_cpu(es->s_first_error_block));
3634                 printk(KERN_CONT "\n");
3635         }
3636         if (es->s_last_error_time) {
3637                 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3638                        sb->s_id,
3639                        ext4_get_tstamp(es, s_last_error_time),
3640                        (int) sizeof(es->s_last_error_func),
3641                        es->s_last_error_func,
3642                        le32_to_cpu(es->s_last_error_line));
3643                 if (es->s_last_error_ino)
3644                         printk(KERN_CONT ": inode %u",
3645                                le32_to_cpu(es->s_last_error_ino));
3646                 if (es->s_last_error_block)
3647                         printk(KERN_CONT ": block %llu", (unsigned long long)
3648                                le64_to_cpu(es->s_last_error_block));
3649                 printk(KERN_CONT "\n");
3650         }
3651         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3652 }
3653
3654 /* Find next suitable group and run ext4_init_inode_table */
3655 static int ext4_run_li_request(struct ext4_li_request *elr)
3656 {
3657         struct ext4_group_desc *gdp = NULL;
3658         struct super_block *sb = elr->lr_super;
3659         ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3660         ext4_group_t group = elr->lr_next_group;
3661         unsigned int prefetch_ios = 0;
3662         int ret = 0;
3663         u64 start_time;
3664
3665         if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3666                 elr->lr_next_group = ext4_mb_prefetch(sb, group,
3667                                 EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
3668                 if (prefetch_ios)
3669                         ext4_mb_prefetch_fini(sb, elr->lr_next_group,
3670                                               prefetch_ios);
3671                 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
3672                                             prefetch_ios);
3673                 if (group >= elr->lr_next_group) {
3674                         ret = 1;
3675                         if (elr->lr_first_not_zeroed != ngroups &&
3676                             !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3677                                 elr->lr_next_group = elr->lr_first_not_zeroed;
3678                                 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3679                                 ret = 0;
3680                         }
3681                 }
3682                 return ret;
3683         }
3684
3685         for (; group < ngroups; group++) {
3686                 gdp = ext4_get_group_desc(sb, group, NULL);
3687                 if (!gdp) {
3688                         ret = 1;
3689                         break;
3690                 }
3691
3692                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3693                         break;
3694         }
3695
3696         if (group >= ngroups)
3697                 ret = 1;
3698
3699         if (!ret) {
3700                 start_time = ktime_get_real_ns();
3701                 ret = ext4_init_inode_table(sb, group,
3702                                             elr->lr_timeout ? 0 : 1);
3703                 trace_ext4_lazy_itable_init(sb, group);
3704                 if (elr->lr_timeout == 0) {
3705                         elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3706                                 EXT4_SB(elr->lr_super)->s_li_wait_mult);
3707                 }
3708                 elr->lr_next_sched = jiffies + elr->lr_timeout;
3709                 elr->lr_next_group = group + 1;
3710         }
3711         return ret;
3712 }
3713
3714 /*
3715  * Remove lr_request from the list_request and free the
3716  * request structure. Should be called with li_list_mtx held
3717  */
3718 static void ext4_remove_li_request(struct ext4_li_request *elr)
3719 {
3720         if (!elr)
3721                 return;
3722
3723         list_del(&elr->lr_request);
3724         EXT4_SB(elr->lr_super)->s_li_request = NULL;
3725         kfree(elr);
3726 }
3727
3728 static void ext4_unregister_li_request(struct super_block *sb)
3729 {
3730         mutex_lock(&ext4_li_mtx);
3731         if (!ext4_li_info) {
3732                 mutex_unlock(&ext4_li_mtx);
3733                 return;
3734         }
3735
3736         mutex_lock(&ext4_li_info->li_list_mtx);
3737         ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3738         mutex_unlock(&ext4_li_info->li_list_mtx);
3739         mutex_unlock(&ext4_li_mtx);
3740 }
3741
3742 static struct task_struct *ext4_lazyinit_task;
3743
3744 /*
3745  * This is the function where ext4lazyinit thread lives. It walks
3746  * through the request list searching for next scheduled filesystem.
3747  * When such a fs is found, run the lazy initialization request
3748  * (ext4_rn_li_request) and keep track of the time spend in this
3749  * function. Based on that time we compute next schedule time of
3750  * the request. When walking through the list is complete, compute
3751  * next waking time and put itself into sleep.
3752  */
3753 static int ext4_lazyinit_thread(void *arg)
3754 {
3755         struct ext4_lazy_init *eli = arg;
3756         struct list_head *pos, *n;
3757         struct ext4_li_request *elr;
3758         unsigned long next_wakeup, cur;
3759
3760         BUG_ON(NULL == eli);
3761
3762 cont_thread:
3763         while (true) {
3764                 next_wakeup = MAX_JIFFY_OFFSET;
3765
3766                 mutex_lock(&eli->li_list_mtx);
3767                 if (list_empty(&eli->li_request_list)) {
3768                         mutex_unlock(&eli->li_list_mtx);
3769                         goto exit_thread;
3770                 }
3771                 list_for_each_safe(pos, n, &eli->li_request_list) {
3772                         int err = 0;
3773                         int progress = 0;
3774                         elr = list_entry(pos, struct ext4_li_request,
3775                                          lr_request);
3776
3777                         if (time_before(jiffies, elr->lr_next_sched)) {
3778                                 if (time_before(elr->lr_next_sched, next_wakeup))
3779                                         next_wakeup = elr->lr_next_sched;
3780                                 continue;
3781                         }
3782                         if (down_read_trylock(&elr->lr_super->s_umount)) {
3783                                 if (sb_start_write_trylock(elr->lr_super)) {
3784                                         progress = 1;
3785                                         /*
3786                                          * We hold sb->s_umount, sb can not
3787                                          * be removed from the list, it is
3788                                          * now safe to drop li_list_mtx
3789                                          */
3790                                         mutex_unlock(&eli->li_list_mtx);
3791                                         err = ext4_run_li_request(elr);
3792                                         sb_end_write(elr->lr_super);
3793                                         mutex_lock(&eli->li_list_mtx);
3794                                         n = pos->next;
3795                                 }
3796                                 up_read((&elr->lr_super->s_umount));
3797                         }
3798                         /* error, remove the lazy_init job */
3799                         if (err) {
3800                                 ext4_remove_li_request(elr);
3801                                 continue;
3802                         }
3803                         if (!progress) {
3804                                 elr->lr_next_sched = jiffies +
3805                                         (prandom_u32()
3806                                          % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3807                         }
3808                         if (time_before(elr->lr_next_sched, next_wakeup))
3809                                 next_wakeup = elr->lr_next_sched;
3810                 }
3811                 mutex_unlock(&eli->li_list_mtx);
3812
3813                 try_to_freeze();
3814
3815                 cur = jiffies;
3816                 if ((time_after_eq(cur, next_wakeup)) ||
3817                     (MAX_JIFFY_OFFSET == next_wakeup)) {
3818                         cond_resched();
3819                         continue;
3820                 }
3821
3822                 schedule_timeout_interruptible(next_wakeup - cur);
3823
3824                 if (kthread_should_stop()) {
3825                         ext4_clear_request_list();
3826                         goto exit_thread;
3827                 }
3828         }
3829
3830 exit_thread:
3831         /*
3832          * It looks like the request list is empty, but we need
3833          * to check it under the li_list_mtx lock, to prevent any
3834          * additions into it, and of course we should lock ext4_li_mtx
3835          * to atomically free the list and ext4_li_info, because at
3836          * this point another ext4 filesystem could be registering
3837          * new one.
3838          */
3839         mutex_lock(&ext4_li_mtx);
3840         mutex_lock(&eli->li_list_mtx);
3841         if (!list_empty(&eli->li_request_list)) {
3842                 mutex_unlock(&eli->li_list_mtx);
3843                 mutex_unlock(&ext4_li_mtx);
3844                 goto cont_thread;
3845         }
3846         mutex_unlock(&eli->li_list_mtx);
3847         kfree(ext4_li_info);
3848         ext4_li_info = NULL;
3849         mutex_unlock(&ext4_li_mtx);
3850
3851         return 0;
3852 }
3853
3854 static void ext4_clear_request_list(void)
3855 {
3856         struct list_head *pos, *n;
3857         struct ext4_li_request *elr;
3858
3859         mutex_lock(&ext4_li_info->li_list_mtx);
3860         list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3861                 elr = list_entry(pos, struct ext4_li_request,
3862                                  lr_request);
3863                 ext4_remove_li_request(elr);
3864         }
3865         mutex_unlock(&ext4_li_info->li_list_mtx);
3866 }
3867
3868 static int ext4_run_lazyinit_thread(void)
3869 {
3870         ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3871                                          ext4_li_info, "ext4lazyinit");
3872         if (IS_ERR(ext4_lazyinit_task)) {
3873                 int err = PTR_ERR(ext4_lazyinit_task);
3874                 ext4_clear_request_list();
3875                 kfree(ext4_li_info);
3876                 ext4_li_info = NULL;
3877                 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3878                                  "initialization thread\n",
3879                                  err);
3880                 return err;
3881         }
3882         ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3883         return 0;
3884 }
3885
3886 /*
3887  * Check whether it make sense to run itable init. thread or not.
3888  * If there is at least one uninitialized inode table, return
3889  * corresponding group number, else the loop goes through all
3890  * groups and return total number of groups.
3891  */
3892 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3893 {
3894         ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3895         struct ext4_group_desc *gdp = NULL;
3896
3897         if (!ext4_has_group_desc_csum(sb))
3898                 return ngroups;
3899
3900         for (group = 0; group < ngroups; group++) {
3901                 gdp = ext4_get_group_desc(sb, group, NULL);
3902                 if (!gdp)
3903                         continue;
3904
3905                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3906                         break;
3907         }
3908
3909         return group;
3910 }
3911
3912 static int ext4_li_info_new(void)
3913 {
3914         struct ext4_lazy_init *eli = NULL;
3915
3916         eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3917         if (!eli)
3918                 return -ENOMEM;
3919
3920         INIT_LIST_HEAD(&eli->li_request_list);
3921         mutex_init(&eli->li_list_mtx);
3922
3923         eli->li_state |= EXT4_LAZYINIT_QUIT;
3924
3925         ext4_li_info = eli;
3926
3927         return 0;
3928 }
3929
3930 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3931                                             ext4_group_t start)
3932 {
3933         struct ext4_li_request *elr;
3934
3935         elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3936         if (!elr)
3937                 return NULL;
3938
3939         elr->lr_super = sb;
3940         elr->lr_first_not_zeroed = start;
3941         if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3942                 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3943                 elr->lr_next_group = start;
3944         } else {
3945                 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3946         }
3947
3948         /*
3949          * Randomize first schedule time of the request to
3950          * spread the inode table initialization requests
3951          * better.
3952          */
3953         elr->lr_next_sched = jiffies + (prandom_u32() %
3954                                 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
3955         return elr;
3956 }
3957
3958 int ext4_register_li_request(struct super_block *sb,
3959                              ext4_group_t first_not_zeroed)
3960 {
3961         struct ext4_sb_info *sbi = EXT4_SB(sb);
3962         struct ext4_li_request *elr = NULL;
3963         ext4_group_t ngroups = sbi->s_groups_count;
3964         int ret = 0;
3965
3966         mutex_lock(&ext4_li_mtx);
3967         if (sbi->s_li_request != NULL) {
3968                 /*
3969                  * Reset timeout so it can be computed again, because
3970                  * s_li_wait_mult might have changed.
3971                  */
3972                 sbi->s_li_request->lr_timeout = 0;
3973                 goto out;
3974         }
3975
3976         if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3977             (first_not_zeroed == ngroups || sb_rdonly(sb) ||
3978              !test_opt(sb, INIT_INODE_TABLE)))
3979                 goto out;
3980
3981         elr = ext4_li_request_new(sb, first_not_zeroed);
3982         if (!elr) {
3983                 ret = -ENOMEM;
3984                 goto out;
3985         }
3986
3987         if (NULL == ext4_li_info) {
3988                 ret = ext4_li_info_new();
3989                 if (ret)
3990                         goto out;
3991         }
3992
3993         mutex_lock(&ext4_li_info->li_list_mtx);
3994         list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3995         mutex_unlock(&ext4_li_info->li_list_mtx);
3996
3997         sbi->s_li_request = elr;
3998         /*
3999          * set elr to NULL here since it has been inserted to
4000          * the request_list and the removal and free of it is
4001          * handled by ext4_clear_request_list from now on.
4002          */
4003         elr = NULL;
4004
4005         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4006                 ret = ext4_run_lazyinit_thread();
4007                 if (ret)
4008                         goto out;
4009         }
4010 out:
4011         mutex_unlock(&ext4_li_mtx);
4012         if (ret)
4013                 kfree(elr);
4014         return ret;
4015 }
4016
4017 /*
4018  * We do not need to lock anything since this is called on
4019  * module unload.
4020  */
4021 static void ext4_destroy_lazyinit_thread(void)
4022 {
4023         /*
4024          * If thread exited earlier
4025          * there's nothing to be done.
4026          */
4027         if (!ext4_li_info || !ext4_lazyinit_task)
4028                 return;
4029
4030         kthread_stop(ext4_lazyinit_task);
4031 }
4032
4033 static int set_journal_csum_feature_set(struct super_block *sb)
4034 {
4035         int ret = 1;
4036         int compat, incompat;
4037         struct ext4_sb_info *sbi = EXT4_SB(sb);
4038
4039         if (ext4_has_metadata_csum(sb)) {
4040                 /* journal checksum v3 */
4041                 compat = 0;
4042                 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4043         } else {
4044                 /* journal checksum v1 */
4045                 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4046                 incompat = 0;
4047         }
4048
4049         jbd2_journal_clear_features(sbi->s_journal,
4050                         JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4051                         JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4052                         JBD2_FEATURE_INCOMPAT_CSUM_V2);
4053         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4054                 ret = jbd2_journal_set_features(sbi->s_journal,
4055                                 compat, 0,
4056                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4057                                 incompat);
4058         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4059                 ret = jbd2_journal_set_features(sbi->s_journal,
4060                                 compat, 0,
4061                                 incompat);
4062                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4063                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4064         } else {
4065                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4066                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4067         }
4068
4069         return ret;
4070 }
4071
4072 /*
4073  * Note: calculating the overhead so we can be compatible with
4074  * historical BSD practice is quite difficult in the face of
4075  * clusters/bigalloc.  This is because multiple metadata blocks from
4076  * different block group can end up in the same allocation cluster.
4077  * Calculating the exact overhead in the face of clustered allocation
4078  * requires either O(all block bitmaps) in memory or O(number of block
4079  * groups**2) in time.  We will still calculate the superblock for
4080  * older file systems --- and if we come across with a bigalloc file
4081  * system with zero in s_overhead_clusters the estimate will be close to
4082  * correct especially for very large cluster sizes --- but for newer
4083  * file systems, it's better to calculate this figure once at mkfs
4084  * time, and store it in the superblock.  If the superblock value is
4085  * present (even for non-bigalloc file systems), we will use it.
4086  */
4087 static int count_overhead(struct super_block *sb, ext4_group_t grp,
4088                           char *buf)
4089 {
4090         struct ext4_sb_info     *sbi = EXT4_SB(sb);
4091         struct ext4_group_desc  *gdp;
4092         ext4_fsblk_t            first_block, last_block, b;
4093         ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
4094         int                     s, j, count = 0;
4095         int                     has_super = ext4_bg_has_super(sb, grp);
4096
4097         if (!ext4_has_feature_bigalloc(sb))
4098                 return (has_super + ext4_bg_num_gdb(sb, grp) +
4099                         (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4100                         sbi->s_itb_per_group + 2);
4101
4102         first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4103                 (grp * EXT4_BLOCKS_PER_GROUP(sb));
4104         last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4105         for (i = 0; i < ngroups; i++) {
4106                 gdp = ext4_get_group_desc(sb, i, NULL);
4107                 b = ext4_block_bitmap(sb, gdp);
4108                 if (b >= first_block && b <= last_block) {
4109                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4110                         count++;
4111                 }
4112                 b = ext4_inode_bitmap(sb, gdp);
4113                 if (b >= first_block && b <= last_block) {
4114                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4115                         count++;
4116                 }
4117                 b = ext4_inode_table(sb, gdp);
4118                 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4119                         for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4120                                 int c = EXT4_B2C(sbi, b - first_block);
4121                                 ext4_set_bit(c, buf);
4122                                 count++;
4123                         }
4124                 if (i != grp)
4125                         continue;
4126                 s = 0;
4127                 if (ext4_bg_has_super(sb, grp)) {
4128                         ext4_set_bit(s++, buf);
4129                         count++;
4130                 }
4131                 j = ext4_bg_num_gdb(sb, grp);
4132                 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4133                         ext4_error(sb, "Invalid number of block group "
4134                                    "descriptor blocks: %d", j);
4135                         j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4136                 }
4137                 count += j;
4138                 for (; j > 0; j--)
4139                         ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4140         }
4141         if (!count)
4142                 return 0;
4143         return EXT4_CLUSTERS_PER_GROUP(sb) -
4144                 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4145 }
4146
4147 /*
4148  * Compute the overhead and stash it in sbi->s_overhead
4149  */
4150 int ext4_calculate_overhead(struct super_block *sb)
4151 {
4152         struct ext4_sb_info *sbi = EXT4_SB(sb);
4153         struct ext4_super_block *es = sbi->s_es;
4154         struct inode *j_inode;
4155         unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4156         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4157         ext4_fsblk_t overhead = 0;
4158         char *buf = (char *) get_zeroed_page(GFP_NOFS);
4159
4160         if (!buf)
4161                 return -ENOMEM;
4162
4163         /*
4164          * Compute the overhead (FS structures).  This is constant
4165          * for a given filesystem unless the number of block groups
4166          * changes so we cache the previous value until it does.
4167          */
4168
4169         /*
4170          * All of the blocks before first_data_block are overhead
4171          */
4172         overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4173
4174         /*
4175          * Add the overhead found in each block group
4176          */
4177         for (i = 0; i < ngroups; i++) {
4178                 int blks;
4179
4180                 blks = count_overhead(sb, i, buf);
4181                 overhead += blks;
4182                 if (blks)
4183                         memset(buf, 0, PAGE_SIZE);
4184                 cond_resched();
4185         }
4186
4187         /*
4188          * Add the internal journal blocks whether the journal has been
4189          * loaded or not
4190          */
4191         if (sbi->s_journal && !sbi->s_journal_bdev)
4192                 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4193         else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4194                 /* j_inum for internal journal is non-zero */
4195                 j_inode = ext4_get_journal_inode(sb, j_inum);
4196                 if (j_inode) {
4197                         j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4198                         overhead += EXT4_NUM_B2C(sbi, j_blocks);
4199                         iput(j_inode);
4200                 } else {
4201                         ext4_msg(sb, KERN_ERR, "can't get journal size");
4202                 }
4203         }
4204         sbi->s_overhead = overhead;
4205         smp_wmb();
4206         free_page((unsigned long) buf);
4207         return 0;
4208 }
4209
4210 static void ext4_set_resv_clusters(struct super_block *sb)
4211 {
4212         ext4_fsblk_t resv_clusters;
4213         struct ext4_sb_info *sbi = EXT4_SB(sb);
4214
4215         /*
4216          * There's no need to reserve anything when we aren't using extents.
4217          * The space estimates are exact, there are no unwritten extents,
4218          * hole punching doesn't need new metadata... This is needed especially
4219          * to keep ext2/3 backward compatibility.
4220          */
4221         if (!ext4_has_feature_extents(sb))
4222                 return;
4223         /*
4224          * By default we reserve 2% or 4096 clusters, whichever is smaller.
4225          * This should cover the situations where we can not afford to run
4226          * out of space like for example punch hole, or converting
4227          * unwritten extents in delalloc path. In most cases such
4228          * allocation would require 1, or 2 blocks, higher numbers are
4229          * very rare.
4230          */
4231         resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4232                          sbi->s_cluster_bits);
4233
4234         do_div(resv_clusters, 50);
4235         resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4236
4237         atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4238 }
4239
4240 static const char *ext4_quota_mode(struct super_block *sb)
4241 {
4242 #ifdef CONFIG_QUOTA
4243         if (!ext4_quota_capable(sb))
4244                 return "none";
4245
4246         if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4247                 return "journalled";
4248         else
4249                 return "writeback";
4250 #else
4251         return "disabled";
4252 #endif
4253 }
4254
4255 static void ext4_setup_csum_trigger(struct super_block *sb,
4256                                     enum ext4_journal_trigger_type type,
4257                                     void (*trigger)(
4258                                         struct jbd2_buffer_trigger_type *type,
4259                                         struct buffer_head *bh,
4260                                         void *mapped_data,
4261                                         size_t size))
4262 {
4263         struct ext4_sb_info *sbi = EXT4_SB(sb);
4264
4265         sbi->s_journal_triggers[type].sb = sb;
4266         sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4267 }
4268
4269 static void ext4_free_sbi(struct ext4_sb_info *sbi)
4270 {
4271         if (!sbi)
4272                 return;
4273
4274         kfree(sbi->s_blockgroup_lock);
4275         fs_put_dax(sbi->s_daxdev, NULL);
4276         kfree(sbi);
4277 }
4278
4279 static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4280 {
4281         struct ext4_sb_info *sbi;
4282
4283         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4284         if (!sbi)
4285                 return NULL;
4286
4287         sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4288                                            NULL, NULL);
4289
4290         sbi->s_blockgroup_lock =
4291                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4292
4293         if (!sbi->s_blockgroup_lock)
4294                 goto err_out;
4295
4296         sb->s_fs_info = sbi;
4297         sbi->s_sb = sb;
4298         return sbi;
4299 err_out:
4300         fs_put_dax(sbi->s_daxdev, NULL);
4301         kfree(sbi);
4302         return NULL;
4303 }
4304
4305 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
4306 {
4307         struct buffer_head *bh, **group_desc;
4308         struct ext4_super_block *es = NULL;
4309         struct ext4_sb_info *sbi = EXT4_SB(sb);
4310         struct flex_groups **flex_groups;
4311         ext4_fsblk_t block;
4312         ext4_fsblk_t logical_sb_block;
4313         unsigned long offset = 0;
4314         unsigned long def_mount_opts;
4315         struct inode *root;
4316         int ret = -ENOMEM;
4317         int blocksize, clustersize;
4318         unsigned int db_count;
4319         unsigned int i;
4320         int needs_recovery, has_huge_files;
4321         __u64 blocks_count;
4322         int err = 0;
4323         ext4_group_t first_not_zeroed;
4324         struct ext4_fs_context *ctx = fc->fs_private;
4325         int silent = fc->sb_flags & SB_SILENT;
4326
4327         /* Set defaults for the variables that will be set during parsing */
4328         if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
4329                 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4330
4331         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
4332         sbi->s_sectors_written_start =
4333                 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
4334
4335         /* -EINVAL is default */
4336         ret = -EINVAL;
4337         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
4338         if (!blocksize) {
4339                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
4340                 goto out_fail;
4341         }
4342
4343         /*
4344          * The ext4 superblock will not be buffer aligned for other than 1kB
4345          * block sizes.  We need to calculate the offset from buffer start.
4346          */
4347         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
4348                 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
4349                 offset = do_div(logical_sb_block, blocksize);
4350         } else {
4351                 logical_sb_block = sbi->s_sb_block;
4352         }
4353
4354         bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4355         if (IS_ERR(bh)) {
4356                 ext4_msg(sb, KERN_ERR, "unable to read superblock");
4357                 ret = PTR_ERR(bh);
4358                 goto out_fail;
4359         }
4360         /*
4361          * Note: s_es must be initialized as soon as possible because
4362          *       some ext4 macro-instructions depend on its value
4363          */
4364         es = (struct ext4_super_block *) (bh->b_data + offset);
4365         sbi->s_es = es;
4366         sb->s_magic = le16_to_cpu(es->s_magic);
4367         if (sb->s_magic != EXT4_SUPER_MAGIC)
4368                 goto cantfind_ext4;
4369         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
4370
4371         /* Warn if metadata_csum and gdt_csum are both set. */
4372         if (ext4_has_feature_metadata_csum(sb) &&
4373             ext4_has_feature_gdt_csum(sb))
4374                 ext4_warning(sb, "metadata_csum and uninit_bg are "
4375                              "redundant flags; please run fsck.");
4376
4377         /* Check for a known checksum algorithm */
4378         if (!ext4_verify_csum_type(sb, es)) {
4379                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4380                          "unknown checksum algorithm.");
4381                 silent = 1;
4382                 goto cantfind_ext4;
4383         }
4384         ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4385                                 ext4_orphan_file_block_trigger);
4386
4387         /* Load the checksum driver */
4388         sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4389         if (IS_ERR(sbi->s_chksum_driver)) {
4390                 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4391                 ret = PTR_ERR(sbi->s_chksum_driver);
4392                 sbi->s_chksum_driver = NULL;
4393                 goto failed_mount;
4394         }
4395
4396         /* Check superblock checksum */
4397         if (!ext4_superblock_csum_verify(sb, es)) {
4398                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4399                          "invalid superblock checksum.  Run e2fsck?");
4400                 silent = 1;
4401                 ret = -EFSBADCRC;
4402                 goto cantfind_ext4;
4403         }
4404
4405         /* Precompute checksum seed for all metadata */
4406         if (ext4_has_feature_csum_seed(sb))
4407                 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4408         else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4409                 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4410                                                sizeof(es->s_uuid));
4411
4412         /* Set defaults before we parse the mount options */
4413         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4414         set_opt(sb, INIT_INODE_TABLE);
4415         if (def_mount_opts & EXT4_DEFM_DEBUG)
4416                 set_opt(sb, DEBUG);
4417         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4418                 set_opt(sb, GRPID);
4419         if (def_mount_opts & EXT4_DEFM_UID16)
4420                 set_opt(sb, NO_UID32);
4421         /* xattr user namespace & acls are now defaulted on */
4422         set_opt(sb, XATTR_USER);
4423 #ifdef CONFIG_EXT4_FS_POSIX_ACL
4424         set_opt(sb, POSIX_ACL);
4425 #endif
4426         if (ext4_has_feature_fast_commit(sb))
4427                 set_opt2(sb, JOURNAL_FAST_COMMIT);
4428         /* don't forget to enable journal_csum when metadata_csum is enabled. */
4429         if (ext4_has_metadata_csum(sb))
4430                 set_opt(sb, JOURNAL_CHECKSUM);
4431
4432         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4433                 set_opt(sb, JOURNAL_DATA);
4434         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4435                 set_opt(sb, ORDERED_DATA);
4436         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4437                 set_opt(sb, WRITEBACK_DATA);
4438
4439         if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
4440                 set_opt(sb, ERRORS_PANIC);
4441         else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
4442                 set_opt(sb, ERRORS_CONT);
4443         else
4444                 set_opt(sb, ERRORS_RO);
4445         /* block_validity enabled by default; disable with noblock_validity */
4446         set_opt(sb, BLOCK_VALIDITY);
4447         if (def_mount_opts & EXT4_DEFM_DISCARD)
4448                 set_opt(sb, DISCARD);
4449
4450         sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
4451         sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
4452         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
4453         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
4454         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
4455
4456         if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4457                 set_opt(sb, BARRIER);
4458
4459         /*
4460          * enable delayed allocation by default
4461          * Use -o nodelalloc to turn it off
4462          */
4463         if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4464             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4465                 set_opt(sb, DELALLOC);
4466
4467         /*
4468          * set default s_li_wait_mult for lazyinit, for the case there is
4469          * no mount option specified.
4470          */
4471         sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
4472
4473         if (le32_to_cpu(es->s_log_block_size) >
4474             (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4475                 ext4_msg(sb, KERN_ERR,
4476                          "Invalid log block size: %u",
4477                          le32_to_cpu(es->s_log_block_size));
4478                 goto failed_mount;
4479         }
4480         if (le32_to_cpu(es->s_log_cluster_size) >
4481             (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4482                 ext4_msg(sb, KERN_ERR,
4483                          "Invalid log cluster size: %u",
4484                          le32_to_cpu(es->s_log_cluster_size));
4485                 goto failed_mount;
4486         }
4487
4488         blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
4489
4490         if (blocksize == PAGE_SIZE)
4491                 set_opt(sb, DIOREAD_NOLOCK);
4492
4493         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4494                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4495                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4496         } else {
4497                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4498                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4499                 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4500                         ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4501                                  sbi->s_first_ino);
4502                         goto failed_mount;
4503                 }
4504                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4505                     (!is_power_of_2(sbi->s_inode_size)) ||
4506                     (sbi->s_inode_size > blocksize)) {
4507                         ext4_msg(sb, KERN_ERR,
4508                                "unsupported inode size: %d",
4509                                sbi->s_inode_size);
4510                         ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
4511                         goto failed_mount;
4512                 }
4513                 /*
4514                  * i_atime_extra is the last extra field available for
4515                  * [acm]times in struct ext4_inode. Checking for that
4516                  * field should suffice to ensure we have extra space
4517                  * for all three.
4518                  */
4519                 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4520                         sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4521                         sb->s_time_gran = 1;
4522                         sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4523                 } else {
4524                         sb->s_time_gran = NSEC_PER_SEC;
4525                         sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4526                 }
4527                 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4528         }
4529         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4530                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4531                         EXT4_GOOD_OLD_INODE_SIZE;
4532                 if (ext4_has_feature_extra_isize(sb)) {
4533                         unsigned v, max = (sbi->s_inode_size -
4534                                            EXT4_GOOD_OLD_INODE_SIZE);
4535
4536                         v = le16_to_cpu(es->s_want_extra_isize);
4537                         if (v > max) {
4538                                 ext4_msg(sb, KERN_ERR,
4539                                          "bad s_want_extra_isize: %d", v);
4540                                 goto failed_mount;
4541                         }
4542                         if (sbi->s_want_extra_isize < v)
4543                                 sbi->s_want_extra_isize = v;
4544
4545                         v = le16_to_cpu(es->s_min_extra_isize);
4546                         if (v > max) {
4547                                 ext4_msg(sb, KERN_ERR,
4548                                          "bad s_min_extra_isize: %d", v);
4549                                 goto failed_mount;
4550                         }
4551                         if (sbi->s_want_extra_isize < v)
4552                                 sbi->s_want_extra_isize = v;
4553                 }
4554         }
4555
4556         err = parse_apply_sb_mount_options(sb, ctx);
4557         if (err < 0)
4558                 goto failed_mount;
4559
4560         sbi->s_def_mount_opt = sbi->s_mount_opt;
4561
4562         err = ext4_check_opt_consistency(fc, sb);
4563         if (err < 0)
4564                 goto failed_mount;
4565
4566         ext4_apply_options(fc, sb);
4567
4568 #if IS_ENABLED(CONFIG_UNICODE)
4569         if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
4570                 const struct ext4_sb_encodings *encoding_info;
4571                 struct unicode_map *encoding;
4572                 __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4573
4574                 encoding_info = ext4_sb_read_encoding(es);
4575                 if (!encoding_info) {
4576                         ext4_msg(sb, KERN_ERR,
4577                                  "Encoding requested by superblock is unknown");
4578                         goto failed_mount;
4579                 }
4580
4581                 encoding = utf8_load(encoding_info->version);
4582                 if (IS_ERR(encoding)) {
4583                         ext4_msg(sb, KERN_ERR,
4584                                  "can't mount with superblock charset: %s-%u.%u.%u "
4585                                  "not supported by the kernel. flags: 0x%x.",
4586                                  encoding_info->name,
4587                                  unicode_major(encoding_info->version),
4588                                  unicode_minor(encoding_info->version),
4589                                  unicode_rev(encoding_info->version),
4590                                  encoding_flags);
4591                         goto failed_mount;
4592                 }
4593                 ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4594                          "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4595                          unicode_major(encoding_info->version),
4596                          unicode_minor(encoding_info->version),
4597                          unicode_rev(encoding_info->version),
4598                          encoding_flags);
4599
4600                 sb->s_encoding = encoding;
4601                 sb->s_encoding_flags = encoding_flags;
4602         }
4603 #endif
4604
4605         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4606                 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
4607                 /* can't mount with both data=journal and dioread_nolock. */
4608                 clear_opt(sb, DIOREAD_NOLOCK);
4609                 clear_opt2(sb, JOURNAL_FAST_COMMIT);
4610                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4611                         ext4_msg(sb, KERN_ERR, "can't mount with "
4612                                  "both data=journal and delalloc");
4613                         goto failed_mount;
4614                 }
4615                 if (test_opt(sb, DAX_ALWAYS)) {
4616                         ext4_msg(sb, KERN_ERR, "can't mount with "
4617                                  "both data=journal and dax");
4618                         goto failed_mount;
4619                 }
4620                 if (ext4_has_feature_encrypt(sb)) {
4621                         ext4_msg(sb, KERN_WARNING,
4622                                  "encrypted files will use data=ordered "
4623                                  "instead of data journaling mode");
4624                 }
4625                 if (test_opt(sb, DELALLOC))
4626                         clear_opt(sb, DELALLOC);
4627         } else {
4628                 sb->s_iflags |= SB_I_CGROUPWB;
4629         }
4630
4631         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
4632                 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
4633
4634         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4635             (ext4_has_compat_features(sb) ||
4636              ext4_has_ro_compat_features(sb) ||
4637              ext4_has_incompat_features(sb)))
4638                 ext4_msg(sb, KERN_WARNING,
4639                        "feature flags set on rev 0 fs, "
4640                        "running e2fsck is recommended");
4641
4642         if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4643                 set_opt2(sb, HURD_COMPAT);
4644                 if (ext4_has_feature_64bit(sb)) {
4645                         ext4_msg(sb, KERN_ERR,
4646                                  "The Hurd can't support 64-bit file systems");
4647                         goto failed_mount;
4648                 }
4649
4650                 /*
4651                  * ea_inode feature uses l_i_version field which is not
4652                  * available in HURD_COMPAT mode.
4653                  */
4654                 if (ext4_has_feature_ea_inode(sb)) {
4655                         ext4_msg(sb, KERN_ERR,
4656                                  "ea_inode feature is not supported for Hurd");
4657                         goto failed_mount;
4658                 }
4659         }
4660
4661         if (IS_EXT2_SB(sb)) {
4662                 if (ext2_feature_set_ok(sb))
4663                         ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4664                                  "using the ext4 subsystem");
4665                 else {
4666                         /*
4667                          * If we're probing be silent, if this looks like
4668                          * it's actually an ext[34] filesystem.
4669                          */
4670                         if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4671                                 goto failed_mount;
4672                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4673                                  "to feature incompatibilities");
4674                         goto failed_mount;
4675                 }
4676         }
4677
4678         if (IS_EXT3_SB(sb)) {
4679                 if (ext3_feature_set_ok(sb))
4680                         ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4681                                  "using the ext4 subsystem");
4682                 else {
4683                         /*
4684                          * If we're probing be silent, if this looks like
4685                          * it's actually an ext4 filesystem.
4686                          */
4687                         if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4688                                 goto failed_mount;
4689                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4690                                  "to feature incompatibilities");
4691                         goto failed_mount;
4692                 }
4693         }
4694
4695         /*
4696          * Check feature flags regardless of the revision level, since we
4697          * previously didn't change the revision level when setting the flags,
4698          * so there is a chance incompat flags are set on a rev 0 filesystem.
4699          */
4700         if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4701                 goto failed_mount;
4702
4703         if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
4704                 ext4_msg(sb, KERN_ERR,
4705                          "Number of reserved GDT blocks insanely large: %d",
4706                          le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4707                 goto failed_mount;
4708         }
4709
4710         if (sbi->s_daxdev) {
4711                 if (blocksize == PAGE_SIZE)
4712                         set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4713                 else
4714                         ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
4715         }
4716
4717         if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4718                 if (ext4_has_feature_inline_data(sb)) {
4719                         ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4720                                         " that may contain inline data");
4721                         goto failed_mount;
4722                 }
4723                 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4724                         ext4_msg(sb, KERN_ERR,
4725                                 "DAX unsupported by block device.");
4726                         goto failed_mount;
4727                 }
4728         }
4729
4730         if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4731                 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4732                          es->s_encryption_level);
4733                 goto failed_mount;
4734         }
4735
4736         if (sb->s_blocksize != blocksize) {
4737                 /*
4738                  * bh must be released before kill_bdev(), otherwise
4739                  * it won't be freed and its page also. kill_bdev()
4740                  * is called by sb_set_blocksize().
4741                  */
4742                 brelse(bh);
4743                 /* Validate the filesystem blocksize */
4744                 if (!sb_set_blocksize(sb, blocksize)) {
4745                         ext4_msg(sb, KERN_ERR, "bad block size %d",
4746                                         blocksize);
4747                         bh = NULL;
4748                         goto failed_mount;
4749                 }
4750
4751                 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
4752                 offset = do_div(logical_sb_block, blocksize);
4753                 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4754                 if (IS_ERR(bh)) {
4755                         ext4_msg(sb, KERN_ERR,
4756                                "Can't read superblock on 2nd try");
4757                         ret = PTR_ERR(bh);
4758                         bh = NULL;
4759                         goto failed_mount;
4760                 }
4761                 es = (struct ext4_super_block *)(bh->b_data + offset);
4762                 sbi->s_es = es;
4763                 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
4764                         ext4_msg(sb, KERN_ERR,
4765                                "Magic mismatch, very weird!");
4766                         goto failed_mount;
4767                 }
4768         }
4769
4770         has_huge_files = ext4_has_feature_huge_file(sb);
4771         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
4772                                                       has_huge_files);
4773         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
4774
4775         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
4776         if (ext4_has_feature_64bit(sb)) {
4777                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
4778                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
4779                     !is_power_of_2(sbi->s_desc_size)) {
4780                         ext4_msg(sb, KERN_ERR,
4781                                "unsupported descriptor size %lu",
4782                                sbi->s_desc_size);
4783                         goto failed_mount;
4784                 }
4785         } else
4786                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
4787
4788         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
4789         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
4790
4791         sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
4792         if (sbi->s_inodes_per_block == 0)
4793                 goto cantfind_ext4;
4794         if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
4795             sbi->s_inodes_per_group > blocksize * 8) {
4796                 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
4797                          sbi->s_inodes_per_group);
4798                 goto failed_mount;
4799         }
4800         sbi->s_itb_per_group = sbi->s_inodes_per_group /
4801                                         sbi->s_inodes_per_block;
4802         sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
4803         sbi->s_sbh = bh;
4804         sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
4805         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
4806         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
4807
4808         for (i = 0; i < 4; i++)
4809                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
4810         sbi->s_def_hash_version = es->s_def_hash_version;
4811         if (ext4_has_feature_dir_index(sb)) {
4812                 i = le32_to_cpu(es->s_flags);
4813                 if (i & EXT2_FLAGS_UNSIGNED_HASH)
4814                         sbi->s_hash_unsigned = 3;
4815                 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
4816 #ifdef __CHAR_UNSIGNED__
4817                         if (!sb_rdonly(sb))
4818                                 es->s_flags |=
4819                                         cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
4820                         sbi->s_hash_unsigned = 3;
4821 #else
4822                         if (!sb_rdonly(sb))
4823                                 es->s_flags |=
4824                                         cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
4825 #endif
4826                 }
4827         }
4828
4829         /* Handle clustersize */
4830         clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4831         if (ext4_has_feature_bigalloc(sb)) {
4832                 if (clustersize < blocksize) {
4833                         ext4_msg(sb, KERN_ERR,
4834                                  "cluster size (%d) smaller than "
4835                                  "block size (%d)", clustersize, blocksize);
4836                         goto failed_mount;
4837                 }
4838                 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4839                         le32_to_cpu(es->s_log_block_size);
4840                 sbi->s_clusters_per_group =
4841                         le32_to_cpu(es->s_clusters_per_group);
4842                 if (sbi->s_clusters_per_group > blocksize * 8) {
4843                         ext4_msg(sb, KERN_ERR,
4844                                  "#clusters per group too big: %lu",
4845                                  sbi->s_clusters_per_group);
4846                         goto failed_mount;
4847                 }
4848                 if (sbi->s_blocks_per_group !=
4849                     (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4850                         ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4851                                  "clusters per group (%lu) inconsistent",
4852                                  sbi->s_blocks_per_group,
4853                                  sbi->s_clusters_per_group);
4854                         goto failed_mount;
4855                 }
4856         } else {
4857                 if (clustersize != blocksize) {
4858                         ext4_msg(sb, KERN_ERR,
4859                                  "fragment/cluster size (%d) != "
4860                                  "block size (%d)", clustersize, blocksize);
4861                         goto failed_mount;
4862                 }
4863                 if (sbi->s_blocks_per_group > blocksize * 8) {
4864                         ext4_msg(sb, KERN_ERR,
4865                                  "#blocks per group too big: %lu",
4866                                  sbi->s_blocks_per_group);
4867                         goto failed_mount;
4868                 }
4869                 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4870                 sbi->s_cluster_bits = 0;
4871         }
4872         sbi->s_cluster_ratio = clustersize / blocksize;
4873
4874         /* Do we have standard group size of clustersize * 8 blocks ? */
4875         if (sbi->s_blocks_per_group == clustersize << 3)
4876                 set_opt2(sb, STD_GROUP_SIZE);
4877
4878         /*
4879          * Test whether we have more sectors than will fit in sector_t,
4880          * and whether the max offset is addressable by the page cache.
4881          */
4882         err = generic_check_addressable(sb->s_blocksize_bits,
4883                                         ext4_blocks_count(es));
4884         if (err) {
4885                 ext4_msg(sb, KERN_ERR, "filesystem"
4886                          " too large to mount safely on this system");
4887                 goto failed_mount;
4888         }
4889
4890         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4891                 goto cantfind_ext4;
4892
4893         /* check blocks count against device size */
4894         blocks_count = sb_bdev_nr_blocks(sb);
4895         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4896                 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4897                        "exceeds size of device (%llu blocks)",
4898                        ext4_blocks_count(es), blocks_count);
4899                 goto failed_mount;
4900         }
4901
4902         /*
4903          * It makes no sense for the first data block to be beyond the end
4904          * of the filesystem.
4905          */
4906         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4907                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4908                          "block %u is beyond end of filesystem (%llu)",
4909                          le32_to_cpu(es->s_first_data_block),
4910                          ext4_blocks_count(es));
4911                 goto failed_mount;
4912         }
4913         if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4914             (sbi->s_cluster_ratio == 1)) {
4915                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4916                          "block is 0 with a 1k block and cluster size");
4917                 goto failed_mount;
4918         }
4919
4920         blocks_count = (ext4_blocks_count(es) -
4921                         le32_to_cpu(es->s_first_data_block) +
4922                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
4923         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4924         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4925                 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4926                        "(block count %llu, first data block %u, "
4927                        "blocks per group %lu)", blocks_count,
4928                        ext4_blocks_count(es),
4929                        le32_to_cpu(es->s_first_data_block),
4930                        EXT4_BLOCKS_PER_GROUP(sb));
4931                 goto failed_mount;
4932         }
4933         sbi->s_groups_count = blocks_count;
4934         sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4935                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4936         if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4937             le32_to_cpu(es->s_inodes_count)) {
4938                 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4939                          le32_to_cpu(es->s_inodes_count),
4940                          ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4941                 ret = -EINVAL;
4942                 goto failed_mount;
4943         }
4944         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4945                    EXT4_DESC_PER_BLOCK(sb);
4946         if (ext4_has_feature_meta_bg(sb)) {
4947                 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4948                         ext4_msg(sb, KERN_WARNING,
4949                                  "first meta block group too large: %u "
4950                                  "(group descriptor block count %u)",
4951                                  le32_to_cpu(es->s_first_meta_bg), db_count);
4952                         goto failed_mount;
4953                 }
4954         }
4955         rcu_assign_pointer(sbi->s_group_desc,
4956                            kvmalloc_array(db_count,
4957                                           sizeof(struct buffer_head *),
4958                                           GFP_KERNEL));
4959         if (sbi->s_group_desc == NULL) {
4960                 ext4_msg(sb, KERN_ERR, "not enough memory");
4961                 ret = -ENOMEM;
4962                 goto failed_mount;
4963         }
4964
4965         bgl_lock_init(sbi->s_blockgroup_lock);
4966
4967         /* Pre-read the descriptors into the buffer cache */
4968         for (i = 0; i < db_count; i++) {
4969                 block = descriptor_loc(sb, logical_sb_block, i);
4970                 ext4_sb_breadahead_unmovable(sb, block);
4971         }
4972
4973         for (i = 0; i < db_count; i++) {
4974                 struct buffer_head *bh;
4975
4976                 block = descriptor_loc(sb, logical_sb_block, i);
4977                 bh = ext4_sb_bread_unmovable(sb, block);
4978                 if (IS_ERR(bh)) {
4979                         ext4_msg(sb, KERN_ERR,
4980                                "can't read group descriptor %d", i);
4981                         db_count = i;
4982                         ret = PTR_ERR(bh);
4983                         goto failed_mount2;
4984                 }
4985                 rcu_read_lock();
4986                 rcu_dereference(sbi->s_group_desc)[i] = bh;
4987                 rcu_read_unlock();
4988         }
4989         sbi->s_gdb_count = db_count;
4990         if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
4991                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4992                 ret = -EFSCORRUPTED;
4993                 goto failed_mount2;
4994         }
4995
4996         timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
4997         spin_lock_init(&sbi->s_error_lock);
4998         INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
4999
5000         /* Register extent status tree shrinker */
5001         if (ext4_es_register_shrinker(sbi))
5002                 goto failed_mount3;
5003
5004         sbi->s_stripe = ext4_get_stripe_size(sbi);
5005         sbi->s_extent_max_zeroout_kb = 32;
5006
5007         /*
5008          * set up enough so that it can read an inode
5009          */
5010         sb->s_op = &ext4_sops;
5011         sb->s_export_op = &ext4_export_ops;
5012         sb->s_xattr = ext4_xattr_handlers;
5013 #ifdef CONFIG_FS_ENCRYPTION
5014         sb->s_cop = &ext4_cryptops;
5015 #endif
5016 #ifdef CONFIG_FS_VERITY
5017         sb->s_vop = &ext4_verityops;
5018 #endif
5019 #ifdef CONFIG_QUOTA
5020         sb->dq_op = &ext4_quota_operations;
5021         if (ext4_has_feature_quota(sb))
5022                 sb->s_qcop = &dquot_quotactl_sysfile_ops;
5023         else
5024                 sb->s_qcop = &ext4_qctl_operations;
5025         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5026 #endif
5027         memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
5028
5029         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5030         mutex_init(&sbi->s_orphan_lock);
5031
5032         /* Initialize fast commit stuff */
5033         atomic_set(&sbi->s_fc_subtid, 0);
5034         INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
5035         INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
5036         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
5037         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
5038         sbi->s_fc_bytes = 0;
5039         ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
5040         sbi->s_fc_ineligible_tid = 0;
5041         spin_lock_init(&sbi->s_fc_lock);
5042         memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
5043         sbi->s_fc_replay_state.fc_regions = NULL;
5044         sbi->s_fc_replay_state.fc_regions_size = 0;
5045         sbi->s_fc_replay_state.fc_regions_used = 0;
5046         sbi->s_fc_replay_state.fc_regions_valid = 0;
5047         sbi->s_fc_replay_state.fc_modified_inodes = NULL;
5048         sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
5049         sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
5050
5051         sb->s_root = NULL;
5052
5053         needs_recovery = (es->s_last_orphan != 0 ||
5054                           ext4_has_feature_orphan_present(sb) ||
5055                           ext4_has_feature_journal_needs_recovery(sb));
5056
5057         if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
5058                 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
5059                         goto failed_mount3a;
5060
5061         /*
5062          * The first inode we look at is the journal inode.  Don't try
5063          * root first: it may be modified in the journal!
5064          */
5065         if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5066                 err = ext4_load_journal(sb, es, ctx->journal_devnum);
5067                 if (err)
5068                         goto failed_mount3a;
5069         } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5070                    ext4_has_feature_journal_needs_recovery(sb)) {
5071                 ext4_msg(sb, KERN_ERR, "required journal recovery "
5072                        "suppressed and not mounted read-only");
5073                 goto failed_mount_wq;
5074         } else {
5075                 /* Nojournal mode, all journal mount options are illegal */
5076                 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5077                         ext4_msg(sb, KERN_ERR, "can't mount with "
5078                                  "journal_checksum, fs mounted w/o journal");
5079                         goto failed_mount_wq;
5080                 }
5081                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5082                         ext4_msg(sb, KERN_ERR, "can't mount with "
5083                                  "journal_async_commit, fs mounted w/o journal");
5084                         goto failed_mount_wq;
5085                 }
5086                 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5087                         ext4_msg(sb, KERN_ERR, "can't mount with "
5088                                  "commit=%lu, fs mounted w/o journal",
5089                                  sbi->s_commit_interval / HZ);
5090                         goto failed_mount_wq;
5091                 }
5092                 if (EXT4_MOUNT_DATA_FLAGS &
5093                     (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5094                         ext4_msg(sb, KERN_ERR, "can't mount with "
5095                                  "data=, fs mounted w/o journal");
5096                         goto failed_mount_wq;
5097                 }
5098                 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5099                 clear_opt(sb, JOURNAL_CHECKSUM);
5100                 clear_opt(sb, DATA_FLAGS);
5101                 clear_opt2(sb, JOURNAL_FAST_COMMIT);
5102                 sbi->s_journal = NULL;
5103                 needs_recovery = 0;
5104                 goto no_journal;
5105         }
5106
5107         if (ext4_has_feature_64bit(sb) &&
5108             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
5109                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
5110                 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
5111                 goto failed_mount_wq;
5112         }
5113
5114         if (!set_journal_csum_feature_set(sb)) {
5115                 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
5116                          "feature set");
5117                 goto failed_mount_wq;
5118         }
5119
5120         if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
5121                 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
5122                                           JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
5123                 ext4_msg(sb, KERN_ERR,
5124                         "Failed to set fast commit journal feature");
5125                 goto failed_mount_wq;
5126         }
5127
5128         /* We have now updated the journal if required, so we can
5129          * validate the data journaling mode. */
5130         switch (test_opt(sb, DATA_FLAGS)) {
5131         case 0:
5132                 /* No mode set, assume a default based on the journal
5133                  * capabilities: ORDERED_DATA if the journal can
5134                  * cope, else JOURNAL_DATA
5135                  */
5136                 if (jbd2_journal_check_available_features
5137                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
5138                         set_opt(sb, ORDERED_DATA);
5139                         sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
5140                 } else {
5141                         set_opt(sb, JOURNAL_DATA);
5142                         sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
5143                 }
5144                 break;
5145
5146         case EXT4_MOUNT_ORDERED_DATA:
5147         case EXT4_MOUNT_WRITEBACK_DATA:
5148                 if (!jbd2_journal_check_available_features
5149                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
5150                         ext4_msg(sb, KERN_ERR, "Journal does not support "
5151                                "requested data journaling mode");
5152                         goto failed_mount_wq;
5153                 }
5154                 break;
5155         default:
5156                 break;
5157         }
5158
5159         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
5160             test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5161                 ext4_msg(sb, KERN_ERR, "can't mount with "
5162                         "journal_async_commit in data=ordered mode");
5163                 goto failed_mount_wq;
5164         }
5165
5166         set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
5167
5168         sbi->s_journal->j_submit_inode_data_buffers =
5169                 ext4_journal_submit_inode_data_buffers;
5170         sbi->s_journal->j_finish_inode_data_buffers =
5171                 ext4_journal_finish_inode_data_buffers;
5172
5173 no_journal:
5174         if (!test_opt(sb, NO_MBCACHE)) {
5175                 sbi->s_ea_block_cache = ext4_xattr_create_cache();
5176                 if (!sbi->s_ea_block_cache) {
5177                         ext4_msg(sb, KERN_ERR,
5178                                  "Failed to create ea_block_cache");
5179                         goto failed_mount_wq;
5180                 }
5181
5182                 if (ext4_has_feature_ea_inode(sb)) {
5183                         sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5184                         if (!sbi->s_ea_inode_cache) {
5185                                 ext4_msg(sb, KERN_ERR,
5186                                          "Failed to create ea_inode_cache");
5187                                 goto failed_mount_wq;
5188                         }
5189                 }
5190         }
5191
5192         if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
5193                 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
5194                 goto failed_mount_wq;
5195         }
5196
5197         /*
5198          * Get the # of file system overhead blocks from the
5199          * superblock if present.
5200          */
5201         sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5202         /* ignore the precalculated value if it is ridiculous */
5203         if (sbi->s_overhead > ext4_blocks_count(es))
5204                 sbi->s_overhead = 0;
5205         /*
5206          * If the bigalloc feature is not enabled recalculating the
5207          * overhead doesn't take long, so we might as well just redo
5208          * it to make sure we are using the correct value.
5209          */
5210         if (!ext4_has_feature_bigalloc(sb))
5211                 sbi->s_overhead = 0;
5212         if (sbi->s_overhead == 0) {
5213                 err = ext4_calculate_overhead(sb);
5214                 if (err)
5215                         goto failed_mount_wq;
5216         }
5217
5218         /*
5219          * The maximum number of concurrent works can be high and
5220          * concurrency isn't really necessary.  Limit it to 1.
5221          */
5222         EXT4_SB(sb)->rsv_conversion_wq =
5223                 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5224         if (!EXT4_SB(sb)->rsv_conversion_wq) {
5225                 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5226                 ret = -ENOMEM;
5227                 goto failed_mount4;
5228         }
5229
5230         /*
5231          * The jbd2_journal_load will have done any necessary log recovery,
5232          * so we can safely mount the rest of the filesystem now.
5233          */
5234
5235         root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5236         if (IS_ERR(root)) {
5237                 ext4_msg(sb, KERN_ERR, "get root inode failed");
5238                 ret = PTR_ERR(root);
5239                 root = NULL;
5240                 goto failed_mount4;
5241         }
5242         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5243                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5244                 iput(root);
5245                 goto failed_mount4;
5246         }
5247
5248         sb->s_root = d_make_root(root);
5249         if (!sb->s_root) {
5250                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
5251                 ret = -ENOMEM;
5252                 goto failed_mount4;
5253         }
5254
5255         ret = ext4_setup_super(sb, es, sb_rdonly(sb));
5256         if (ret == -EROFS) {
5257                 sb->s_flags |= SB_RDONLY;
5258                 ret = 0;
5259         } else if (ret)
5260                 goto failed_mount4a;
5261
5262         ext4_set_resv_clusters(sb);
5263
5264         if (test_opt(sb, BLOCK_VALIDITY)) {
5265                 err = ext4_setup_system_zone(sb);
5266                 if (err) {
5267                         ext4_msg(sb, KERN_ERR, "failed to initialize system "
5268                                  "zone (%d)", err);
5269                         goto failed_mount4a;
5270                 }
5271         }
5272         ext4_fc_replay_cleanup(sb);
5273
5274         ext4_ext_init(sb);
5275
5276         /*
5277          * Enable optimize_scan if number of groups is > threshold. This can be
5278          * turned off by passing "mb_optimize_scan=0". This can also be
5279          * turned on forcefully by passing "mb_optimize_scan=1".
5280          */
5281         if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5282                 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5283                         set_opt2(sb, MB_OPTIMIZE_SCAN);
5284                 else
5285                         clear_opt2(sb, MB_OPTIMIZE_SCAN);
5286         }
5287
5288         err = ext4_mb_init(sb);
5289         if (err) {
5290                 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5291                          err);
5292                 goto failed_mount5;
5293         }
5294
5295         /*
5296          * We can only set up the journal commit callback once
5297          * mballoc is initialized
5298          */
5299         if (sbi->s_journal)
5300                 sbi->s_journal->j_commit_callback =
5301                         ext4_journal_commit_callback;
5302
5303         block = ext4_count_free_clusters(sb);
5304         ext4_free_blocks_count_set(sbi->s_es,
5305                                    EXT4_C2B(sbi, block));
5306         err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
5307                                   GFP_KERNEL);
5308         if (!err) {
5309                 unsigned long freei = ext4_count_free_inodes(sb);
5310                 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
5311                 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
5312                                           GFP_KERNEL);
5313         }
5314         if (!err)
5315                 err = percpu_counter_init(&sbi->s_dirs_counter,
5316                                           ext4_count_dirs(sb), GFP_KERNEL);
5317         if (!err)
5318                 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
5319                                           GFP_KERNEL);
5320         if (!err)
5321                 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
5322                                           GFP_KERNEL);
5323         if (!err)
5324                 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
5325
5326         if (err) {
5327                 ext4_msg(sb, KERN_ERR, "insufficient memory");
5328                 goto failed_mount6;
5329         }
5330
5331         if (ext4_has_feature_flex_bg(sb))
5332                 if (!ext4_fill_flex_info(sb)) {
5333                         ext4_msg(sb, KERN_ERR,
5334                                "unable to initialize "
5335                                "flex_bg meta info!");
5336                         ret = -ENOMEM;
5337                         goto failed_mount6;
5338                 }
5339
5340         err = ext4_register_li_request(sb, first_not_zeroed);
5341         if (err)
5342                 goto failed_mount6;
5343
5344         err = ext4_register_sysfs(sb);
5345         if (err)
5346                 goto failed_mount7;
5347
5348         err = ext4_init_orphan_info(sb);
5349         if (err)
5350                 goto failed_mount8;
5351 #ifdef CONFIG_QUOTA
5352         /* Enable quota usage during mount. */
5353         if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5354                 err = ext4_enable_quotas(sb);
5355                 if (err)
5356                         goto failed_mount9;
5357         }
5358 #endif  /* CONFIG_QUOTA */
5359
5360         /*
5361          * Save the original bdev mapping's wb_err value which could be
5362          * used to detect the metadata async write error.
5363          */
5364         spin_lock_init(&sbi->s_bdev_wb_lock);
5365         errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5366                                  &sbi->s_bdev_wb_err);
5367         sb->s_bdev->bd_super = sb;
5368         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5369         ext4_orphan_cleanup(sb, es);
5370         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5371         /*
5372          * Update the checksum after updating free space/inode counters and
5373          * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5374          * checksum in the buffer cache until it is written out and
5375          * e2fsprogs programs trying to open a file system immediately
5376          * after it is mounted can fail.
5377          */
5378         ext4_superblock_csum_set(sb);
5379         if (needs_recovery) {
5380                 ext4_msg(sb, KERN_INFO, "recovery complete");
5381                 err = ext4_mark_recovery_complete(sb, es);
5382                 if (err)
5383                         goto failed_mount9;
5384         }
5385
5386         if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5387                 ext4_msg(sb, KERN_WARNING,
5388                          "mounting with \"discard\" option, but the device does not support discard");
5389
5390         if (es->s_error_count)
5391                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5392
5393         /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5394         ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5395         ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5396         ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5397         atomic_set(&sbi->s_warning_count, 0);
5398         atomic_set(&sbi->s_msg_count, 0);
5399
5400         return 0;
5401
5402 cantfind_ext4:
5403         if (!silent)
5404                 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5405         goto failed_mount;
5406
5407 failed_mount9:
5408         ext4_release_orphan_info(sb);
5409 failed_mount8:
5410         ext4_unregister_sysfs(sb);
5411         kobject_put(&sbi->s_kobj);
5412 failed_mount7:
5413         ext4_unregister_li_request(sb);
5414 failed_mount6:
5415         ext4_mb_release(sb);
5416         rcu_read_lock();
5417         flex_groups = rcu_dereference(sbi->s_flex_groups);
5418         if (flex_groups) {
5419                 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
5420                         kvfree(flex_groups[i]);
5421                 kvfree(flex_groups);
5422         }
5423         rcu_read_unlock();
5424         percpu_counter_destroy(&sbi->s_freeclusters_counter);
5425         percpu_counter_destroy(&sbi->s_freeinodes_counter);
5426         percpu_counter_destroy(&sbi->s_dirs_counter);
5427         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
5428         percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
5429         percpu_free_rwsem(&sbi->s_writepages_rwsem);
5430 failed_mount5:
5431         ext4_ext_release(sb);
5432         ext4_release_system_zone(sb);
5433 failed_mount4a:
5434         dput(sb->s_root);
5435         sb->s_root = NULL;
5436 failed_mount4:
5437         ext4_msg(sb, KERN_ERR, "mount failed");
5438         if (EXT4_SB(sb)->rsv_conversion_wq)
5439                 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5440 failed_mount_wq:
5441         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5442         sbi->s_ea_inode_cache = NULL;
5443
5444         ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5445         sbi->s_ea_block_cache = NULL;
5446
5447         if (sbi->s_journal) {
5448                 /* flush s_error_work before journal destroy. */
5449                 flush_work(&sbi->s_error_work);
5450                 jbd2_journal_destroy(sbi->s_journal);
5451                 sbi->s_journal = NULL;
5452         }
5453 failed_mount3a:
5454         ext4_es_unregister_shrinker(sbi);
5455 failed_mount3:
5456         /* flush s_error_work before sbi destroy */
5457         flush_work(&sbi->s_error_work);
5458         del_timer_sync(&sbi->s_err_report);
5459         ext4_stop_mmpd(sbi);
5460 failed_mount2:
5461         rcu_read_lock();
5462         group_desc = rcu_dereference(sbi->s_group_desc);
5463         for (i = 0; i < db_count; i++)
5464                 brelse(group_desc[i]);
5465         kvfree(group_desc);
5466         rcu_read_unlock();
5467 failed_mount:
5468         if (sbi->s_chksum_driver)
5469                 crypto_free_shash(sbi->s_chksum_driver);
5470
5471 #if IS_ENABLED(CONFIG_UNICODE)
5472         utf8_unload(sb->s_encoding);
5473 #endif
5474
5475 #ifdef CONFIG_QUOTA
5476         for (i = 0; i < EXT4_MAXQUOTAS; i++)
5477                 kfree(get_qf_name(sb, sbi, i));
5478 #endif
5479         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5480         /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
5481         brelse(bh);
5482         ext4_blkdev_remove(sbi);
5483 out_fail:
5484         sb->s_fs_info = NULL;
5485         return err ? err : ret;
5486 }
5487
5488 static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5489 {
5490         struct ext4_fs_context *ctx = fc->fs_private;
5491         struct ext4_sb_info *sbi;
5492         const char *descr;
5493         int ret;
5494
5495         sbi = ext4_alloc_sbi(sb);
5496         if (!sbi)
5497                 return -ENOMEM;
5498
5499         fc->s_fs_info = sbi;
5500
5501         /* Cleanup superblock name */
5502         strreplace(sb->s_id, '/', '!');
5503
5504         sbi->s_sb_block = 1;    /* Default super block location */
5505         if (ctx->spec & EXT4_SPEC_s_sb_block)
5506                 sbi->s_sb_block = ctx->s_sb_block;
5507
5508         ret = __ext4_fill_super(fc, sb);
5509         if (ret < 0)
5510                 goto free_sbi;
5511
5512         if (sbi->s_journal) {
5513                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5514                         descr = " journalled data mode";
5515                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5516                         descr = " ordered data mode";
5517                 else
5518                         descr = " writeback data mode";
5519         } else
5520                 descr = "out journal";
5521
5522         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5523                 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
5524                          "Quota mode: %s.", descr, ext4_quota_mode(sb));
5525
5526         /* Update the s_overhead_clusters if necessary */
5527         ext4_update_overhead(sb, false);
5528         return 0;
5529
5530 free_sbi:
5531         ext4_free_sbi(sbi);
5532         fc->s_fs_info = NULL;
5533         return ret;
5534 }
5535
5536 static int ext4_get_tree(struct fs_context *fc)
5537 {
5538         return get_tree_bdev(fc, ext4_fill_super);
5539 }
5540
5541 /*
5542  * Setup any per-fs journal parameters now.  We'll do this both on
5543  * initial mount, once the journal has been initialised but before we've
5544  * done any recovery; and again on any subsequent remount.
5545  */
5546 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5547 {
5548         struct ext4_sb_info *sbi = EXT4_SB(sb);
5549
5550         journal->j_commit_interval = sbi->s_commit_interval;
5551         journal->j_min_batch_time = sbi->s_min_batch_time;
5552         journal->j_max_batch_time = sbi->s_max_batch_time;
5553         ext4_fc_init(sb, journal);
5554
5555         write_lock(&journal->j_state_lock);
5556         if (test_opt(sb, BARRIER))
5557                 journal->j_flags |= JBD2_BARRIER;
5558         else
5559                 journal->j_flags &= ~JBD2_BARRIER;
5560         if (test_opt(sb, DATA_ERR_ABORT))
5561                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5562         else
5563                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5564         write_unlock(&journal->j_state_lock);
5565 }
5566
5567 static struct inode *ext4_get_journal_inode(struct super_block *sb,
5568                                              unsigned int journal_inum)
5569 {
5570         struct inode *journal_inode;
5571
5572         /*
5573          * Test for the existence of a valid inode on disk.  Bad things
5574          * happen if we iget() an unused inode, as the subsequent iput()
5575          * will try to delete it.
5576          */
5577         journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5578         if (IS_ERR(journal_inode)) {
5579                 ext4_msg(sb, KERN_ERR, "no journal found");
5580                 return NULL;
5581         }
5582         if (!journal_inode->i_nlink) {
5583                 make_bad_inode(journal_inode);
5584                 iput(journal_inode);
5585                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5586                 return NULL;
5587         }
5588
5589         ext4_debug("Journal inode found at %p: %lld bytes\n",
5590                   journal_inode, journal_inode->i_size);
5591         if (!S_ISREG(journal_inode->i_mode)) {
5592                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
5593                 iput(journal_inode);
5594                 return NULL;
5595         }
5596         return journal_inode;
5597 }
5598
5599 static journal_t *ext4_get_journal(struct super_block *sb,
5600                                    unsigned int journal_inum)
5601 {
5602         struct inode *journal_inode;
5603         journal_t *journal;
5604
5605         if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5606                 return NULL;
5607
5608         journal_inode = ext4_get_journal_inode(sb, journal_inum);
5609         if (!journal_inode)
5610                 return NULL;
5611
5612         journal = jbd2_journal_init_inode(journal_inode);
5613         if (!journal) {
5614                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5615                 iput(journal_inode);
5616                 return NULL;
5617         }
5618         journal->j_private = sb;
5619         ext4_init_journal_params(sb, journal);
5620         return journal;
5621 }
5622
5623 static journal_t *ext4_get_dev_journal(struct super_block *sb,
5624                                        dev_t j_dev)
5625 {
5626         struct buffer_head *bh;
5627         journal_t *journal;
5628         ext4_fsblk_t start;
5629         ext4_fsblk_t len;
5630         int hblock, blocksize;
5631         ext4_fsblk_t sb_block;
5632         unsigned long offset;
5633         struct ext4_super_block *es;
5634         struct block_device *bdev;
5635
5636         if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5637                 return NULL;
5638
5639         bdev = ext4_blkdev_get(j_dev, sb);
5640         if (bdev == NULL)
5641                 return NULL;
5642
5643         blocksize = sb->s_blocksize;
5644         hblock = bdev_logical_block_size(bdev);
5645         if (blocksize < hblock) {
5646                 ext4_msg(sb, KERN_ERR,
5647                         "blocksize too small for journal device");
5648                 goto out_bdev;
5649         }
5650
5651         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5652         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5653         set_blocksize(bdev, blocksize);
5654         if (!(bh = __bread(bdev, sb_block, blocksize))) {
5655                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5656                        "external journal");
5657                 goto out_bdev;
5658         }
5659
5660         es = (struct ext4_super_block *) (bh->b_data + offset);
5661         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5662             !(le32_to_cpu(es->s_feature_incompat) &
5663               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5664                 ext4_msg(sb, KERN_ERR, "external journal has "
5665                                         "bad superblock");
5666                 brelse(bh);
5667                 goto out_bdev;
5668         }
5669
5670         if ((le32_to_cpu(es->s_feature_ro_compat) &
5671              EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5672             es->s_checksum != ext4_superblock_csum(sb, es)) {
5673                 ext4_msg(sb, KERN_ERR, "external journal has "
5674                                        "corrupt superblock");
5675                 brelse(bh);
5676                 goto out_bdev;
5677         }
5678
5679         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5680                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5681                 brelse(bh);
5682                 goto out_bdev;
5683         }
5684
5685         len = ext4_blocks_count(es);
5686         start = sb_block + 1;
5687         brelse(bh);     /* we're done with the superblock */
5688
5689         journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
5690                                         start, len, blocksize);
5691         if (!journal) {
5692                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
5693                 goto out_bdev;
5694         }
5695         journal->j_private = sb;
5696         if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
5697                 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
5698                 goto out_journal;
5699         }
5700         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5701                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5702                                         "user (unsupported) - %d",
5703                         be32_to_cpu(journal->j_superblock->s_nr_users));
5704                 goto out_journal;
5705         }
5706         EXT4_SB(sb)->s_journal_bdev = bdev;
5707         ext4_init_journal_params(sb, journal);
5708         return journal;
5709
5710 out_journal:
5711         jbd2_journal_destroy(journal);
5712 out_bdev:
5713         ext4_blkdev_put(bdev);
5714         return NULL;
5715 }
5716
5717 static int ext4_load_journal(struct super_block *sb,
5718                              struct ext4_super_block *es,
5719                              unsigned long journal_devnum)
5720 {
5721         journal_t *journal;
5722         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5723         dev_t journal_dev;
5724         int err = 0;
5725         int really_read_only;
5726         int journal_dev_ro;
5727
5728         if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5729                 return -EFSCORRUPTED;
5730
5731         if (journal_devnum &&
5732             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5733                 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5734                         "numbers have changed");
5735                 journal_dev = new_decode_dev(journal_devnum);
5736         } else
5737                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5738
5739         if (journal_inum && journal_dev) {
5740                 ext4_msg(sb, KERN_ERR,
5741                          "filesystem has both journal inode and journal device!");
5742                 return -EINVAL;
5743         }
5744
5745         if (journal_inum) {
5746                 journal = ext4_get_journal(sb, journal_inum);
5747                 if (!journal)
5748                         return -EINVAL;
5749         } else {
5750                 journal = ext4_get_dev_journal(sb, journal_dev);
5751                 if (!journal)
5752                         return -EINVAL;
5753         }
5754
5755         journal_dev_ro = bdev_read_only(journal->j_dev);
5756         really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5757
5758         if (journal_dev_ro && !sb_rdonly(sb)) {
5759                 ext4_msg(sb, KERN_ERR,
5760                          "journal device read-only, try mounting with '-o ro'");
5761                 err = -EROFS;
5762                 goto err_out;
5763         }
5764
5765         /*
5766          * Are we loading a blank journal or performing recovery after a
5767          * crash?  For recovery, we need to check in advance whether we
5768          * can get read-write access to the device.
5769          */
5770         if (ext4_has_feature_journal_needs_recovery(sb)) {
5771                 if (sb_rdonly(sb)) {
5772                         ext4_msg(sb, KERN_INFO, "INFO: recovery "
5773                                         "required on readonly filesystem");
5774                         if (really_read_only) {
5775                                 ext4_msg(sb, KERN_ERR, "write access "
5776                                         "unavailable, cannot proceed "
5777                                         "(try mounting with noload)");
5778                                 err = -EROFS;
5779                                 goto err_out;
5780                         }
5781                         ext4_msg(sb, KERN_INFO, "write access will "
5782                                "be enabled during recovery");
5783                 }
5784         }
5785
5786         if (!(journal->j_flags & JBD2_BARRIER))
5787                 ext4_msg(sb, KERN_INFO, "barriers disabled");
5788
5789         if (!ext4_has_feature_journal_needs_recovery(sb))
5790                 err = jbd2_journal_wipe(journal, !really_read_only);
5791         if (!err) {
5792                 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5793                 if (save)
5794                         memcpy(save, ((char *) es) +
5795                                EXT4_S_ERR_START, EXT4_S_ERR_LEN);
5796                 err = jbd2_journal_load(journal);
5797                 if (save)
5798                         memcpy(((char *) es) + EXT4_S_ERR_START,
5799                                save, EXT4_S_ERR_LEN);
5800                 kfree(save);
5801         }
5802
5803         if (err) {
5804                 ext4_msg(sb, KERN_ERR, "error loading journal");
5805                 goto err_out;
5806         }
5807
5808         EXT4_SB(sb)->s_journal = journal;
5809         err = ext4_clear_journal_err(sb, es);
5810         if (err) {
5811                 EXT4_SB(sb)->s_journal = NULL;
5812                 jbd2_journal_destroy(journal);
5813                 return err;
5814         }
5815
5816         if (!really_read_only && journal_devnum &&
5817             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5818                 es->s_journal_dev = cpu_to_le32(journal_devnum);
5819
5820                 /* Make sure we flush the recovery flag to disk. */
5821                 ext4_commit_super(sb);
5822         }
5823
5824         return 0;
5825
5826 err_out:
5827         jbd2_journal_destroy(journal);
5828         return err;
5829 }
5830
5831 /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
5832 static void ext4_update_super(struct super_block *sb)
5833 {
5834         struct ext4_sb_info *sbi = EXT4_SB(sb);
5835         struct ext4_super_block *es = sbi->s_es;
5836         struct buffer_head *sbh = sbi->s_sbh;
5837
5838         lock_buffer(sbh);
5839         /*
5840          * If the file system is mounted read-only, don't update the
5841          * superblock write time.  This avoids updating the superblock
5842          * write time when we are mounting the root file system
5843          * read/only but we need to replay the journal; at that point,
5844          * for people who are east of GMT and who make their clock
5845          * tick in localtime for Windows bug-for-bug compatibility,
5846          * the clock is set in the future, and this will cause e2fsck
5847          * to complain and force a full file system check.
5848          */
5849         if (!(sb->s_flags & SB_RDONLY))
5850                 ext4_update_tstamp(es, s_wtime);
5851         es->s_kbytes_written =
5852                 cpu_to_le64(sbi->s_kbytes_written +
5853                     ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
5854                       sbi->s_sectors_written_start) >> 1));
5855         if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
5856                 ext4_free_blocks_count_set(es,
5857                         EXT4_C2B(sbi, percpu_counter_sum_positive(
5858                                 &sbi->s_freeclusters_counter)));
5859         if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
5860                 es->s_free_inodes_count =
5861                         cpu_to_le32(percpu_counter_sum_positive(
5862                                 &sbi->s_freeinodes_counter));
5863         /* Copy error information to the on-disk superblock */
5864         spin_lock(&sbi->s_error_lock);
5865         if (sbi->s_add_error_count > 0) {
5866                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
5867                 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
5868                         __ext4_update_tstamp(&es->s_first_error_time,
5869                                              &es->s_first_error_time_hi,
5870                                              sbi->s_first_error_time);
5871                         strncpy(es->s_first_error_func, sbi->s_first_error_func,
5872                                 sizeof(es->s_first_error_func));
5873                         es->s_first_error_line =
5874                                 cpu_to_le32(sbi->s_first_error_line);
5875                         es->s_first_error_ino =
5876                                 cpu_to_le32(sbi->s_first_error_ino);
5877                         es->s_first_error_block =
5878                                 cpu_to_le64(sbi->s_first_error_block);
5879                         es->s_first_error_errcode =
5880                                 ext4_errno_to_code(sbi->s_first_error_code);
5881                 }
5882                 __ext4_update_tstamp(&es->s_last_error_time,
5883                                      &es->s_last_error_time_hi,
5884                                      sbi->s_last_error_time);
5885                 strncpy(es->s_last_error_func, sbi->s_last_error_func,
5886                         sizeof(es->s_last_error_func));
5887                 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
5888                 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
5889                 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
5890                 es->s_last_error_errcode =
5891                                 ext4_errno_to_code(sbi->s_last_error_code);
5892                 /*
5893                  * Start the daily error reporting function if it hasn't been
5894                  * started already
5895                  */
5896                 if (!es->s_error_count)
5897                         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
5898                 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
5899                 sbi->s_add_error_count = 0;
5900         }
5901         spin_unlock(&sbi->s_error_lock);
5902
5903         ext4_superblock_csum_set(sb);
5904         unlock_buffer(sbh);
5905 }
5906
5907 static int ext4_commit_super(struct super_block *sb)
5908 {
5909         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
5910
5911         if (!sbh)
5912                 return -EINVAL;
5913         if (block_device_ejected(sb))
5914                 return -ENODEV;
5915
5916         ext4_update_super(sb);
5917
5918         lock_buffer(sbh);
5919         /* Buffer got discarded which means block device got invalidated */
5920         if (!buffer_mapped(sbh)) {
5921                 unlock_buffer(sbh);
5922                 return -EIO;
5923         }
5924
5925         if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
5926                 /*
5927                  * Oh, dear.  A previous attempt to write the
5928                  * superblock failed.  This could happen because the
5929                  * USB device was yanked out.  Or it could happen to
5930                  * be a transient write error and maybe the block will
5931                  * be remapped.  Nothing we can do but to retry the
5932                  * write and hope for the best.
5933                  */
5934                 ext4_msg(sb, KERN_ERR, "previous I/O error to "
5935                        "superblock detected");
5936                 clear_buffer_write_io_error(sbh);
5937                 set_buffer_uptodate(sbh);
5938         }
5939         get_bh(sbh);
5940         /* Clear potential dirty bit if it was journalled update */
5941         clear_buffer_dirty(sbh);
5942         sbh->b_end_io = end_buffer_write_sync;
5943         submit_bh(REQ_OP_WRITE | REQ_SYNC |
5944                   (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
5945         wait_on_buffer(sbh);
5946         if (buffer_write_io_error(sbh)) {
5947                 ext4_msg(sb, KERN_ERR, "I/O error while writing "
5948                        "superblock");
5949                 clear_buffer_write_io_error(sbh);
5950                 set_buffer_uptodate(sbh);
5951                 return -EIO;
5952         }
5953         return 0;
5954 }
5955
5956 /*
5957  * Have we just finished recovery?  If so, and if we are mounting (or
5958  * remounting) the filesystem readonly, then we will end up with a
5959  * consistent fs on disk.  Record that fact.
5960  */
5961 static int ext4_mark_recovery_complete(struct super_block *sb,
5962                                        struct ext4_super_block *es)
5963 {
5964         int err;
5965         journal_t *journal = EXT4_SB(sb)->s_journal;
5966
5967         if (!ext4_has_feature_journal(sb)) {
5968                 if (journal != NULL) {
5969                         ext4_error(sb, "Journal got removed while the fs was "
5970                                    "mounted!");
5971                         return -EFSCORRUPTED;
5972                 }
5973                 return 0;
5974         }
5975         jbd2_journal_lock_updates(journal);
5976         err = jbd2_journal_flush(journal, 0);
5977         if (err < 0)
5978                 goto out;
5979
5980         if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
5981             ext4_has_feature_orphan_present(sb))) {
5982                 if (!ext4_orphan_file_empty(sb)) {
5983                         ext4_error(sb, "Orphan file not empty on read-only fs.");
5984                         err = -EFSCORRUPTED;
5985                         goto out;
5986                 }
5987                 ext4_clear_feature_journal_needs_recovery(sb);
5988                 ext4_clear_feature_orphan_present(sb);
5989                 ext4_commit_super(sb);
5990         }
5991 out:
5992         jbd2_journal_unlock_updates(journal);
5993         return err;
5994 }
5995
5996 /*
5997  * If we are mounting (or read-write remounting) a filesystem whose journal
5998  * has recorded an error from a previous lifetime, move that error to the
5999  * main filesystem now.
6000  */
6001 static int ext4_clear_journal_err(struct super_block *sb,
6002                                    struct ext4_super_block *es)
6003 {
6004         journal_t *journal;
6005         int j_errno;
6006         const char *errstr;
6007
6008         if (!ext4_has_feature_journal(sb)) {
6009                 ext4_error(sb, "Journal got removed while the fs was mounted!");
6010                 return -EFSCORRUPTED;
6011         }
6012
6013         journal = EXT4_SB(sb)->s_journal;
6014
6015         /*
6016          * Now check for any error status which may have been recorded in the
6017          * journal by a prior ext4_error() or ext4_abort()
6018          */
6019
6020         j_errno = jbd2_journal_errno(journal);
6021         if (j_errno) {
6022                 char nbuf[16];
6023
6024                 errstr = ext4_decode_error(sb, j_errno, nbuf);
6025                 ext4_warning(sb, "Filesystem error recorded "
6026                              "from previous mount: %s", errstr);
6027                 ext4_warning(sb, "Marking fs in need of filesystem check.");
6028
6029                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6030                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6031                 ext4_commit_super(sb);
6032
6033                 jbd2_journal_clear_err(journal);
6034                 jbd2_journal_update_sb_errno(journal);
6035         }
6036         return 0;
6037 }
6038
6039 /*
6040  * Force the running and committing transactions to commit,
6041  * and wait on the commit.
6042  */
6043 int ext4_force_commit(struct super_block *sb)
6044 {
6045         journal_t *journal;
6046
6047         if (sb_rdonly(sb))
6048                 return 0;
6049
6050         journal = EXT4_SB(sb)->s_journal;
6051         return ext4_journal_force_commit(journal);
6052 }
6053
6054 static int ext4_sync_fs(struct super_block *sb, int wait)
6055 {
6056         int ret = 0;
6057         tid_t target;
6058         bool needs_barrier = false;
6059         struct ext4_sb_info *sbi = EXT4_SB(sb);
6060
6061         if (unlikely(ext4_forced_shutdown(sbi)))
6062                 return 0;
6063
6064         trace_ext4_sync_fs(sb, wait);
6065         flush_workqueue(sbi->rsv_conversion_wq);
6066         /*
6067          * Writeback quota in non-journalled quota case - journalled quota has
6068          * no dirty dquots
6069          */
6070         dquot_writeback_dquots(sb, -1);
6071         /*
6072          * Data writeback is possible w/o journal transaction, so barrier must
6073          * being sent at the end of the function. But we can skip it if
6074          * transaction_commit will do it for us.
6075          */
6076         if (sbi->s_journal) {
6077                 target = jbd2_get_latest_transaction(sbi->s_journal);
6078                 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6079                     !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6080                         needs_barrier = true;
6081
6082                 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6083                         if (wait)
6084                                 ret = jbd2_log_wait_commit(sbi->s_journal,
6085                                                            target);
6086                 }
6087         } else if (wait && test_opt(sb, BARRIER))
6088                 needs_barrier = true;
6089         if (needs_barrier) {
6090                 int err;
6091                 err = blkdev_issue_flush(sb->s_bdev);
6092                 if (!ret)
6093                         ret = err;
6094         }
6095
6096         return ret;
6097 }
6098
6099 /*
6100  * LVM calls this function before a (read-only) snapshot is created.  This
6101  * gives us a chance to flush the journal completely and mark the fs clean.
6102  *
6103  * Note that only this function cannot bring a filesystem to be in a clean
6104  * state independently. It relies on upper layer to stop all data & metadata
6105  * modifications.
6106  */
6107 static int ext4_freeze(struct super_block *sb)
6108 {
6109         int error = 0;
6110         journal_t *journal;
6111
6112         if (sb_rdonly(sb))
6113                 return 0;
6114
6115         journal = EXT4_SB(sb)->s_journal;
6116
6117         if (journal) {
6118                 /* Now we set up the journal barrier. */
6119                 jbd2_journal_lock_updates(journal);
6120
6121                 /*
6122                  * Don't clear the needs_recovery flag if we failed to
6123                  * flush the journal.
6124                  */
6125                 error = jbd2_journal_flush(journal, 0);
6126                 if (error < 0)
6127                         goto out;
6128
6129                 /* Journal blocked and flushed, clear needs_recovery flag. */
6130                 ext4_clear_feature_journal_needs_recovery(sb);
6131                 if (ext4_orphan_file_empty(sb))
6132                         ext4_clear_feature_orphan_present(sb);
6133         }
6134
6135         error = ext4_commit_super(sb);
6136 out:
6137         if (journal)
6138                 /* we rely on upper layer to stop further updates */
6139                 jbd2_journal_unlock_updates(journal);
6140         return error;
6141 }
6142
6143 /*
6144  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
6145  * flag here, even though the filesystem is not technically dirty yet.
6146  */
6147 static int ext4_unfreeze(struct super_block *sb)
6148 {
6149         if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
6150                 return 0;
6151
6152         if (EXT4_SB(sb)->s_journal) {
6153                 /* Reset the needs_recovery flag before the fs is unlocked. */
6154                 ext4_set_feature_journal_needs_recovery(sb);
6155                 if (ext4_has_feature_orphan_file(sb))
6156                         ext4_set_feature_orphan_present(sb);
6157         }
6158
6159         ext4_commit_super(sb);
6160         return 0;
6161 }
6162
6163 /*
6164  * Structure to save mount options for ext4_remount's benefit
6165  */
6166 struct ext4_mount_options {
6167         unsigned long s_mount_opt;
6168         unsigned long s_mount_opt2;
6169         kuid_t s_resuid;
6170         kgid_t s_resgid;
6171         unsigned long s_commit_interval;
6172         u32 s_min_batch_time, s_max_batch_time;
6173 #ifdef CONFIG_QUOTA
6174         int s_jquota_fmt;
6175         char *s_qf_names[EXT4_MAXQUOTAS];
6176 #endif
6177 };
6178
6179 static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6180 {
6181         struct ext4_fs_context *ctx = fc->fs_private;
6182         struct ext4_super_block *es;
6183         struct ext4_sb_info *sbi = EXT4_SB(sb);
6184         unsigned long old_sb_flags;
6185         struct ext4_mount_options old_opts;
6186         ext4_group_t g;
6187         int err = 0;
6188 #ifdef CONFIG_QUOTA
6189         int enable_quota = 0;
6190         int i, j;
6191         char *to_free[EXT4_MAXQUOTAS];
6192 #endif
6193
6194
6195         /* Store the original options */
6196         old_sb_flags = sb->s_flags;
6197         old_opts.s_mount_opt = sbi->s_mount_opt;
6198         old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6199         old_opts.s_resuid = sbi->s_resuid;
6200         old_opts.s_resgid = sbi->s_resgid;
6201         old_opts.s_commit_interval = sbi->s_commit_interval;
6202         old_opts.s_min_batch_time = sbi->s_min_batch_time;
6203         old_opts.s_max_batch_time = sbi->s_max_batch_time;
6204 #ifdef CONFIG_QUOTA
6205         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6206         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6207                 if (sbi->s_qf_names[i]) {
6208                         char *qf_name = get_qf_name(sb, sbi, i);
6209
6210                         old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6211                         if (!old_opts.s_qf_names[i]) {
6212                                 for (j = 0; j < i; j++)
6213                                         kfree(old_opts.s_qf_names[j]);
6214                                 return -ENOMEM;
6215                         }
6216                 } else
6217                         old_opts.s_qf_names[i] = NULL;
6218 #endif
6219         if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6220                 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6221                         ctx->journal_ioprio =
6222                                 sbi->s_journal->j_task->io_context->ioprio;
6223                 else
6224                         ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6225
6226         }
6227
6228         ext4_apply_options(fc, sb);
6229
6230         if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6231             test_opt(sb, JOURNAL_CHECKSUM)) {
6232                 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6233                          "during remount not supported; ignoring");
6234                 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6235         }
6236
6237         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6238                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6239                         ext4_msg(sb, KERN_ERR, "can't mount with "
6240                                  "both data=journal and delalloc");
6241                         err = -EINVAL;
6242                         goto restore_opts;
6243                 }
6244                 if (test_opt(sb, DIOREAD_NOLOCK)) {
6245                         ext4_msg(sb, KERN_ERR, "can't mount with "
6246                                  "both data=journal and dioread_nolock");
6247                         err = -EINVAL;
6248                         goto restore_opts;
6249                 }
6250         } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6251                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6252                         ext4_msg(sb, KERN_ERR, "can't mount with "
6253                                 "journal_async_commit in data=ordered mode");
6254                         err = -EINVAL;
6255                         goto restore_opts;
6256                 }
6257         }
6258
6259         if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6260                 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6261                 err = -EINVAL;
6262                 goto restore_opts;
6263         }
6264
6265         if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
6266                 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6267
6268         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6269                 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6270
6271         es = sbi->s_es;
6272
6273         if (sbi->s_journal) {
6274                 ext4_init_journal_params(sb, sbi->s_journal);
6275                 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6276         }
6277
6278         /* Flush outstanding errors before changing fs state */
6279         flush_work(&sbi->s_error_work);
6280
6281         if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6282                 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
6283                         err = -EROFS;
6284                         goto restore_opts;
6285                 }
6286
6287                 if (fc->sb_flags & SB_RDONLY) {
6288                         err = sync_filesystem(sb);
6289                         if (err < 0)
6290                                 goto restore_opts;
6291                         err = dquot_suspend(sb, -1);
6292                         if (err < 0)
6293                                 goto restore_opts;
6294
6295                         /*
6296                          * First of all, the unconditional stuff we have to do
6297                          * to disable replay of the journal when we next remount
6298                          */
6299                         sb->s_flags |= SB_RDONLY;
6300
6301                         /*
6302                          * OK, test if we are remounting a valid rw partition
6303                          * readonly, and if so set the rdonly flag and then
6304                          * mark the partition as valid again.
6305                          */
6306                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6307                             (sbi->s_mount_state & EXT4_VALID_FS))
6308                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
6309
6310                         if (sbi->s_journal) {
6311                                 /*
6312                                  * We let remount-ro finish even if marking fs
6313                                  * as clean failed...
6314                                  */
6315                                 ext4_mark_recovery_complete(sb, es);
6316                         }
6317                 } else {
6318                         /* Make sure we can mount this feature set readwrite */
6319                         if (ext4_has_feature_readonly(sb) ||
6320                             !ext4_feature_set_ok(sb, 0)) {
6321                                 err = -EROFS;
6322                                 goto restore_opts;
6323                         }
6324                         /*
6325                          * Make sure the group descriptor checksums
6326                          * are sane.  If they aren't, refuse to remount r/w.
6327                          */
6328                         for (g = 0; g < sbi->s_groups_count; g++) {
6329                                 struct ext4_group_desc *gdp =
6330                                         ext4_get_group_desc(sb, g, NULL);
6331
6332                                 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6333                                         ext4_msg(sb, KERN_ERR,
6334                "ext4_remount: Checksum for group %u failed (%u!=%u)",
6335                 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6336                                                le16_to_cpu(gdp->bg_checksum));
6337                                         err = -EFSBADCRC;
6338                                         goto restore_opts;
6339                                 }
6340                         }
6341
6342                         /*
6343                          * If we have an unprocessed orphan list hanging
6344                          * around from a previously readonly bdev mount,
6345                          * require a full umount/remount for now.
6346                          */
6347                         if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6348                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
6349                                        "remount RDWR because of unprocessed "
6350                                        "orphan inode list.  Please "
6351                                        "umount/remount instead");
6352                                 err = -EINVAL;
6353                                 goto restore_opts;
6354                         }
6355
6356                         /*
6357                          * Mounting a RDONLY partition read-write, so reread
6358                          * and store the current valid flag.  (It may have
6359                          * been changed by e2fsck since we originally mounted
6360                          * the partition.)
6361                          */
6362                         if (sbi->s_journal) {
6363                                 err = ext4_clear_journal_err(sb, es);
6364                                 if (err)
6365                                         goto restore_opts;
6366                         }
6367                         sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6368                                               ~EXT4_FC_REPLAY);
6369
6370                         err = ext4_setup_super(sb, es, 0);
6371                         if (err)
6372                                 goto restore_opts;
6373
6374                         sb->s_flags &= ~SB_RDONLY;
6375                         if (ext4_has_feature_mmp(sb))
6376                                 if (ext4_multi_mount_protect(sb,
6377                                                 le64_to_cpu(es->s_mmp_block))) {
6378                                         err = -EROFS;
6379                                         goto restore_opts;
6380                                 }
6381 #ifdef CONFIG_QUOTA
6382                         enable_quota = 1;
6383 #endif
6384                 }
6385         }
6386
6387         /*
6388          * Reinitialize lazy itable initialization thread based on
6389          * current settings
6390          */
6391         if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6392                 ext4_unregister_li_request(sb);
6393         else {
6394                 ext4_group_t first_not_zeroed;
6395                 first_not_zeroed = ext4_has_uninit_itable(sb);
6396                 ext4_register_li_request(sb, first_not_zeroed);
6397         }
6398
6399         /*
6400          * Handle creation of system zone data early because it can fail.
6401          * Releasing of existing data is done when we are sure remount will
6402          * succeed.
6403          */
6404         if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6405                 err = ext4_setup_system_zone(sb);
6406                 if (err)
6407                         goto restore_opts;
6408         }
6409
6410         if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6411                 err = ext4_commit_super(sb);
6412                 if (err)
6413                         goto restore_opts;
6414         }
6415
6416 #ifdef CONFIG_QUOTA
6417         /* Release old quota file names */
6418         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6419                 kfree(old_opts.s_qf_names[i]);
6420         if (enable_quota) {
6421                 if (sb_any_quota_suspended(sb))
6422                         dquot_resume(sb, -1);
6423                 else if (ext4_has_feature_quota(sb)) {
6424                         err = ext4_enable_quotas(sb);
6425                         if (err)
6426                                 goto restore_opts;
6427                 }
6428         }
6429 #endif
6430         if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6431                 ext4_release_system_zone(sb);
6432
6433         if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6434                 ext4_stop_mmpd(sbi);
6435
6436         return 0;
6437
6438 restore_opts:
6439         sb->s_flags = old_sb_flags;
6440         sbi->s_mount_opt = old_opts.s_mount_opt;
6441         sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6442         sbi->s_resuid = old_opts.s_resuid;
6443         sbi->s_resgid = old_opts.s_resgid;
6444         sbi->s_commit_interval = old_opts.s_commit_interval;
6445         sbi->s_min_batch_time = old_opts.s_min_batch_time;
6446         sbi->s_max_batch_time = old_opts.s_max_batch_time;
6447         if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6448                 ext4_release_system_zone(sb);
6449 #ifdef CONFIG_QUOTA
6450         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6451         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6452                 to_free[i] = get_qf_name(sb, sbi, i);
6453                 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6454         }
6455         synchronize_rcu();
6456         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6457                 kfree(to_free[i]);
6458 #endif
6459         if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6460                 ext4_stop_mmpd(sbi);
6461         return err;
6462 }
6463
6464 static int ext4_reconfigure(struct fs_context *fc)
6465 {
6466         struct super_block *sb = fc->root->d_sb;
6467         int ret;
6468
6469         fc->s_fs_info = EXT4_SB(sb);
6470
6471         ret = ext4_check_opt_consistency(fc, sb);
6472         if (ret < 0)
6473                 return ret;
6474
6475         ret = __ext4_remount(fc, sb);
6476         if (ret < 0)
6477                 return ret;
6478
6479         ext4_msg(sb, KERN_INFO, "re-mounted. Quota mode: %s.",
6480                  ext4_quota_mode(sb));
6481
6482         return 0;
6483 }
6484
6485 #ifdef CONFIG_QUOTA
6486 static int ext4_statfs_project(struct super_block *sb,
6487                                kprojid_t projid, struct kstatfs *buf)
6488 {
6489         struct kqid qid;
6490         struct dquot *dquot;
6491         u64 limit;
6492         u64 curblock;
6493
6494         qid = make_kqid_projid(projid);
6495         dquot = dqget(sb, qid);
6496         if (IS_ERR(dquot))
6497                 return PTR_ERR(dquot);
6498         spin_lock(&dquot->dq_dqb_lock);
6499
6500         limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6501                              dquot->dq_dqb.dqb_bhardlimit);
6502         limit >>= sb->s_blocksize_bits;
6503
6504         if (limit && buf->f_blocks > limit) {
6505                 curblock = (dquot->dq_dqb.dqb_curspace +
6506                             dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6507                 buf->f_blocks = limit;
6508                 buf->f_bfree = buf->f_bavail =
6509                         (buf->f_blocks > curblock) ?
6510                          (buf->f_blocks - curblock) : 0;
6511         }
6512
6513         limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6514                              dquot->dq_dqb.dqb_ihardlimit);
6515         if (limit && buf->f_files > limit) {
6516                 buf->f_files = limit;
6517                 buf->f_ffree =
6518                         (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6519                          (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6520         }
6521
6522         spin_unlock(&dquot->dq_dqb_lock);
6523         dqput(dquot);
6524         return 0;
6525 }
6526 #endif
6527
6528 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6529 {
6530         struct super_block *sb = dentry->d_sb;
6531         struct ext4_sb_info *sbi = EXT4_SB(sb);
6532         struct ext4_super_block *es = sbi->s_es;
6533         ext4_fsblk_t overhead = 0, resv_blocks;
6534         s64 bfree;
6535         resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6536
6537         if (!test_opt(sb, MINIX_DF))
6538                 overhead = sbi->s_overhead;
6539
6540         buf->f_type = EXT4_SUPER_MAGIC;
6541         buf->f_bsize = sb->s_blocksize;
6542         buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6543         bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6544                 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6545         /* prevent underflow in case that few free space is available */
6546         buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6547         buf->f_bavail = buf->f_bfree -
6548                         (ext4_r_blocks_count(es) + resv_blocks);
6549         if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6550                 buf->f_bavail = 0;
6551         buf->f_files = le32_to_cpu(es->s_inodes_count);
6552         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6553         buf->f_namelen = EXT4_NAME_LEN;
6554         buf->f_fsid = uuid_to_fsid(es->s_uuid);
6555
6556 #ifdef CONFIG_QUOTA
6557         if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6558             sb_has_quota_limits_enabled(sb, PRJQUOTA))
6559                 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6560 #endif
6561         return 0;
6562 }
6563
6564
6565 #ifdef CONFIG_QUOTA
6566
6567 /*
6568  * Helper functions so that transaction is started before we acquire dqio_sem
6569  * to keep correct lock ordering of transaction > dqio_sem
6570  */
6571 static inline struct inode *dquot_to_inode(struct dquot *dquot)
6572 {
6573         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6574 }
6575
6576 static int ext4_write_dquot(struct dquot *dquot)
6577 {
6578         int ret, err;
6579         handle_t *handle;
6580         struct inode *inode;
6581
6582         inode = dquot_to_inode(dquot);
6583         handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6584                                     EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6585         if (IS_ERR(handle))
6586                 return PTR_ERR(handle);
6587         ret = dquot_commit(dquot);
6588         err = ext4_journal_stop(handle);
6589         if (!ret)
6590                 ret = err;
6591         return ret;
6592 }
6593
6594 static int ext4_acquire_dquot(struct dquot *dquot)
6595 {
6596         int ret, err;
6597         handle_t *handle;
6598
6599         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6600                                     EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6601         if (IS_ERR(handle))
6602                 return PTR_ERR(handle);
6603         ret = dquot_acquire(dquot);
6604         err = ext4_journal_stop(handle);
6605         if (!ret)
6606                 ret = err;
6607         return ret;
6608 }
6609
6610 static int ext4_release_dquot(struct dquot *dquot)
6611 {
6612         int ret, err;
6613         handle_t *handle;
6614
6615         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6616                                     EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6617         if (IS_ERR(handle)) {
6618                 /* Release dquot anyway to avoid endless cycle in dqput() */
6619                 dquot_release(dquot);
6620                 return PTR_ERR(handle);
6621         }
6622         ret = dquot_release(dquot);
6623         err = ext4_journal_stop(handle);
6624         if (!ret)
6625                 ret = err;
6626         return ret;
6627 }
6628
6629 static int ext4_mark_dquot_dirty(struct dquot *dquot)
6630 {
6631         struct super_block *sb = dquot->dq_sb;
6632
6633         if (ext4_is_quota_journalled(sb)) {
6634                 dquot_mark_dquot_dirty(dquot);
6635                 return ext4_write_dquot(dquot);
6636         } else {
6637                 return dquot_mark_dquot_dirty(dquot);
6638         }
6639 }
6640
6641 static int ext4_write_info(struct super_block *sb, int type)
6642 {
6643         int ret, err;
6644         handle_t *handle;
6645
6646         /* Data block + inode block */
6647         handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
6648         if (IS_ERR(handle))
6649                 return PTR_ERR(handle);
6650         ret = dquot_commit_info(sb, type);
6651         err = ext4_journal_stop(handle);
6652         if (!ret)
6653                 ret = err;
6654         return ret;
6655 }
6656
6657 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6658 {
6659         struct ext4_inode_info *ei = EXT4_I(inode);
6660
6661         /* The first argument of lockdep_set_subclass has to be
6662          * *exactly* the same as the argument to init_rwsem() --- in
6663          * this case, in init_once() --- or lockdep gets unhappy
6664          * because the name of the lock is set using the
6665          * stringification of the argument to init_rwsem().
6666          */
6667         (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
6668         lockdep_set_subclass(&ei->i_data_sem, subclass);
6669 }
6670
6671 /*
6672  * Standard function to be called on quota_on
6673  */
6674 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6675                          const struct path *path)
6676 {
6677         int err;
6678
6679         if (!test_opt(sb, QUOTA))
6680                 return -EINVAL;
6681
6682         /* Quotafile not on the same filesystem? */
6683         if (path->dentry->d_sb != sb)
6684                 return -EXDEV;
6685
6686         /* Quota already enabled for this file? */
6687         if (IS_NOQUOTA(d_inode(path->dentry)))
6688                 return -EBUSY;
6689
6690         /* Journaling quota? */
6691         if (EXT4_SB(sb)->s_qf_names[type]) {
6692                 /* Quotafile not in fs root? */
6693                 if (path->dentry->d_parent != sb->s_root)
6694                         ext4_msg(sb, KERN_WARNING,
6695                                 "Quota file not on filesystem root. "
6696                                 "Journaled quota will not work");
6697                 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6698         } else {
6699                 /*
6700                  * Clear the flag just in case mount options changed since
6701                  * last time.
6702                  */
6703                 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6704         }
6705
6706         /*
6707          * When we journal data on quota file, we have to flush journal to see
6708          * all updates to the file when we bypass pagecache...
6709          */
6710         if (EXT4_SB(sb)->s_journal &&
6711             ext4_should_journal_data(d_inode(path->dentry))) {
6712                 /*
6713                  * We don't need to lock updates but journal_flush() could
6714                  * otherwise be livelocked...
6715                  */
6716                 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
6717                 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
6718                 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
6719                 if (err)
6720                         return err;
6721         }
6722
6723         lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6724         err = dquot_quota_on(sb, type, format_id, path);
6725         if (!err) {
6726                 struct inode *inode = d_inode(path->dentry);
6727                 handle_t *handle;
6728
6729                 /*
6730                  * Set inode flags to prevent userspace from messing with quota
6731                  * files. If this fails, we return success anyway since quotas
6732                  * are already enabled and this is not a hard failure.
6733                  */
6734                 inode_lock(inode);
6735                 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6736                 if (IS_ERR(handle))
6737                         goto unlock_inode;
6738                 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6739                 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6740                                 S_NOATIME | S_IMMUTABLE);
6741                 err = ext4_mark_inode_dirty(handle, inode);
6742                 ext4_journal_stop(handle);
6743         unlock_inode:
6744                 inode_unlock(inode);
6745                 if (err)
6746                         dquot_quota_off(sb, type);
6747         }
6748         if (err)
6749                 lockdep_set_quota_inode(path->dentry->d_inode,
6750                                              I_DATA_SEM_NORMAL);
6751         return err;
6752 }
6753
6754 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
6755                              unsigned int flags)
6756 {
6757         int err;
6758         struct inode *qf_inode;
6759         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6760                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6761                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6762                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6763         };
6764
6765         BUG_ON(!ext4_has_feature_quota(sb));
6766
6767         if (!qf_inums[type])
6768                 return -EPERM;
6769
6770         qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
6771         if (IS_ERR(qf_inode)) {
6772                 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
6773                 return PTR_ERR(qf_inode);
6774         }
6775
6776         /* Don't account quota for quota files to avoid recursion */
6777         qf_inode->i_flags |= S_NOQUOTA;
6778         lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
6779         err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
6780         if (err)
6781                 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
6782         iput(qf_inode);
6783
6784         return err;
6785 }
6786
6787 /* Enable usage tracking for all quota types. */
6788 int ext4_enable_quotas(struct super_block *sb)
6789 {
6790         int type, err = 0;
6791         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6792                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6793                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6794                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6795         };
6796         bool quota_mopt[EXT4_MAXQUOTAS] = {
6797                 test_opt(sb, USRQUOTA),
6798                 test_opt(sb, GRPQUOTA),
6799                 test_opt(sb, PRJQUOTA),
6800         };
6801
6802         sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
6803         for (type = 0; type < EXT4_MAXQUOTAS; type++) {
6804                 if (qf_inums[type]) {
6805                         err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
6806                                 DQUOT_USAGE_ENABLED |
6807                                 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
6808                         if (err) {
6809                                 ext4_warning(sb,
6810                                         "Failed to enable quota tracking "
6811                                         "(type=%d, err=%d). Please run "
6812                                         "e2fsck to fix.", type, err);
6813                                 for (type--; type >= 0; type--) {
6814                                         struct inode *inode;
6815
6816                                         inode = sb_dqopt(sb)->files[type];
6817                                         if (inode)
6818                                                 inode = igrab(inode);
6819                                         dquot_quota_off(sb, type);
6820                                         if (inode) {
6821                                                 lockdep_set_quota_inode(inode,
6822                                                         I_DATA_SEM_NORMAL);
6823                                                 iput(inode);
6824                                         }
6825                                 }
6826
6827                                 return err;
6828                         }
6829                 }
6830         }
6831         return 0;
6832 }
6833
6834 static int ext4_quota_off(struct super_block *sb, int type)
6835 {
6836         struct inode *inode = sb_dqopt(sb)->files[type];
6837         handle_t *handle;
6838         int err;
6839
6840         /* Force all delayed allocation blocks to be allocated.
6841          * Caller already holds s_umount sem */
6842         if (test_opt(sb, DELALLOC))
6843                 sync_filesystem(sb);
6844
6845         if (!inode || !igrab(inode))
6846                 goto out;
6847
6848         err = dquot_quota_off(sb, type);
6849         if (err || ext4_has_feature_quota(sb))
6850                 goto out_put;
6851
6852         inode_lock(inode);
6853         /*
6854          * Update modification times of quota files when userspace can
6855          * start looking at them. If we fail, we return success anyway since
6856          * this is not a hard failure and quotas are already disabled.
6857          */
6858         handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6859         if (IS_ERR(handle)) {
6860                 err = PTR_ERR(handle);
6861                 goto out_unlock;
6862         }
6863         EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
6864         inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
6865         inode->i_mtime = inode->i_ctime = current_time(inode);
6866         err = ext4_mark_inode_dirty(handle, inode);
6867         ext4_journal_stop(handle);
6868 out_unlock:
6869         inode_unlock(inode);
6870 out_put:
6871         lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
6872         iput(inode);
6873         return err;
6874 out:
6875         return dquot_quota_off(sb, type);
6876 }
6877
6878 /* Read data from quotafile - avoid pagecache and such because we cannot afford
6879  * acquiring the locks... As quota files are never truncated and quota code
6880  * itself serializes the operations (and no one else should touch the files)
6881  * we don't have to be afraid of races */
6882 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
6883                                size_t len, loff_t off)
6884 {
6885         struct inode *inode = sb_dqopt(sb)->files[type];
6886         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6887         int offset = off & (sb->s_blocksize - 1);
6888         int tocopy;
6889         size_t toread;
6890         struct buffer_head *bh;
6891         loff_t i_size = i_size_read(inode);
6892
6893         if (off > i_size)
6894                 return 0;
6895         if (off+len > i_size)
6896                 len = i_size-off;
6897         toread = len;
6898         while (toread > 0) {
6899                 tocopy = sb->s_blocksize - offset < toread ?
6900                                 sb->s_blocksize - offset : toread;
6901                 bh = ext4_bread(NULL, inode, blk, 0);
6902                 if (IS_ERR(bh))
6903                         return PTR_ERR(bh);
6904                 if (!bh)        /* A hole? */
6905                         memset(data, 0, tocopy);
6906                 else
6907                         memcpy(data, bh->b_data+offset, tocopy);
6908                 brelse(bh);
6909                 offset = 0;
6910                 toread -= tocopy;
6911                 data += tocopy;
6912                 blk++;
6913         }
6914         return len;
6915 }
6916
6917 /* Write to quotafile (we know the transaction is already started and has
6918  * enough credits) */
6919 static ssize_t ext4_quota_write(struct super_block *sb, int type,
6920                                 const char *data, size_t len, loff_t off)
6921 {
6922         struct inode *inode = sb_dqopt(sb)->files[type];
6923         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
6924         int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
6925         int retries = 0;
6926         struct buffer_head *bh;
6927         handle_t *handle = journal_current_handle();
6928
6929         if (!handle) {
6930                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6931                         " cancelled because transaction is not started",
6932                         (unsigned long long)off, (unsigned long long)len);
6933                 return -EIO;
6934         }
6935         /*
6936          * Since we account only one data block in transaction credits,
6937          * then it is impossible to cross a block boundary.
6938          */
6939         if (sb->s_blocksize - offset < len) {
6940                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6941                         " cancelled because not block aligned",
6942                         (unsigned long long)off, (unsigned long long)len);
6943                 return -EIO;
6944         }
6945
6946         do {
6947                 bh = ext4_bread(handle, inode, blk,
6948                                 EXT4_GET_BLOCKS_CREATE |
6949                                 EXT4_GET_BLOCKS_METADATA_NOFAIL);
6950         } while (PTR_ERR(bh) == -ENOSPC &&
6951                  ext4_should_retry_alloc(inode->i_sb, &retries));
6952         if (IS_ERR(bh))
6953                 return PTR_ERR(bh);
6954         if (!bh)
6955                 goto out;
6956         BUFFER_TRACE(bh, "get write access");
6957         err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
6958         if (err) {
6959                 brelse(bh);
6960                 return err;
6961         }
6962         lock_buffer(bh);
6963         memcpy(bh->b_data+offset, data, len);
6964         flush_dcache_page(bh->b_page);
6965         unlock_buffer(bh);
6966         err = ext4_handle_dirty_metadata(handle, NULL, bh);
6967         brelse(bh);
6968 out:
6969         if (inode->i_size < off + len) {
6970                 i_size_write(inode, off + len);
6971                 EXT4_I(inode)->i_disksize = inode->i_size;
6972                 err2 = ext4_mark_inode_dirty(handle, inode);
6973                 if (unlikely(err2 && !err))
6974                         err = err2;
6975         }
6976         return err ? err : len;
6977 }
6978 #endif
6979
6980 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
6981 static inline void register_as_ext2(void)
6982 {
6983         int err = register_filesystem(&ext2_fs_type);
6984         if (err)
6985                 printk(KERN_WARNING
6986                        "EXT4-fs: Unable to register as ext2 (%d)\n", err);
6987 }
6988
6989 static inline void unregister_as_ext2(void)
6990 {
6991         unregister_filesystem(&ext2_fs_type);
6992 }
6993
6994 static inline int ext2_feature_set_ok(struct super_block *sb)
6995 {
6996         if (ext4_has_unknown_ext2_incompat_features(sb))
6997                 return 0;
6998         if (sb_rdonly(sb))
6999                 return 1;
7000         if (ext4_has_unknown_ext2_ro_compat_features(sb))
7001                 return 0;
7002         return 1;
7003 }
7004 #else
7005 static inline void register_as_ext2(void) { }
7006 static inline void unregister_as_ext2(void) { }
7007 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7008 #endif
7009
7010 static inline void register_as_ext3(void)
7011 {
7012         int err = register_filesystem(&ext3_fs_type);
7013         if (err)
7014                 printk(KERN_WARNING
7015                        "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7016 }
7017
7018 static inline void unregister_as_ext3(void)
7019 {
7020         unregister_filesystem(&ext3_fs_type);
7021 }
7022
7023 static inline int ext3_feature_set_ok(struct super_block *sb)
7024 {
7025         if (ext4_has_unknown_ext3_incompat_features(sb))
7026                 return 0;
7027         if (!ext4_has_feature_journal(sb))
7028                 return 0;
7029         if (sb_rdonly(sb))
7030                 return 1;
7031         if (ext4_has_unknown_ext3_ro_compat_features(sb))
7032                 return 0;
7033         return 1;
7034 }
7035
7036 static struct file_system_type ext4_fs_type = {
7037         .owner                  = THIS_MODULE,
7038         .name                   = "ext4",
7039         .init_fs_context        = ext4_init_fs_context,
7040         .parameters             = ext4_param_specs,
7041         .kill_sb                = kill_block_super,
7042         .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7043 };
7044 MODULE_ALIAS_FS("ext4");
7045
7046 /* Shared across all ext4 file systems */
7047 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7048
7049 static int __init ext4_init_fs(void)
7050 {
7051         int i, err;
7052
7053         ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7054         ext4_li_info = NULL;
7055
7056         /* Build-time check for flags consistency */
7057         ext4_check_flag_values();
7058
7059         for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7060                 init_waitqueue_head(&ext4__ioend_wq[i]);
7061
7062         err = ext4_init_es();
7063         if (err)
7064                 return err;
7065
7066         err = ext4_init_pending();
7067         if (err)
7068                 goto out7;
7069
7070         err = ext4_init_post_read_processing();
7071         if (err)
7072                 goto out6;
7073
7074         err = ext4_init_pageio();
7075         if (err)
7076                 goto out5;
7077
7078         err = ext4_init_system_zone();
7079         if (err)
7080                 goto out4;
7081
7082         err = ext4_init_sysfs();
7083         if (err)
7084                 goto out3;
7085
7086         err = ext4_init_mballoc();
7087         if (err)
7088                 goto out2;
7089         err = init_inodecache();
7090         if (err)
7091                 goto out1;
7092
7093         err = ext4_fc_init_dentry_cache();
7094         if (err)
7095                 goto out05;
7096
7097         register_as_ext3();
7098         register_as_ext2();
7099         err = register_filesystem(&ext4_fs_type);
7100         if (err)
7101                 goto out;
7102
7103         return 0;
7104 out:
7105         unregister_as_ext2();
7106         unregister_as_ext3();
7107         ext4_fc_destroy_dentry_cache();
7108 out05:
7109         destroy_inodecache();
7110 out1:
7111         ext4_exit_mballoc();
7112 out2:
7113         ext4_exit_sysfs();
7114 out3:
7115         ext4_exit_system_zone();
7116 out4:
7117         ext4_exit_pageio();
7118 out5:
7119         ext4_exit_post_read_processing();
7120 out6:
7121         ext4_exit_pending();
7122 out7:
7123         ext4_exit_es();
7124
7125         return err;
7126 }
7127
7128 static void __exit ext4_exit_fs(void)
7129 {
7130         ext4_destroy_lazyinit_thread();
7131         unregister_as_ext2();
7132         unregister_as_ext3();
7133         unregister_filesystem(&ext4_fs_type);
7134         ext4_fc_destroy_dentry_cache();
7135         destroy_inodecache();
7136         ext4_exit_mballoc();
7137         ext4_exit_sysfs();
7138         ext4_exit_system_zone();
7139         ext4_exit_pageio();
7140         ext4_exit_post_read_processing();
7141         ext4_exit_es();
7142         ext4_exit_pending();
7143 }
7144
7145 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7146 MODULE_DESCRIPTION("Fourth Extended Filesystem");
7147 MODULE_LICENSE("GPL");
7148 MODULE_SOFTDEP("pre: crc32c");
7149 module_init(ext4_init_fs)
7150 module_exit(ext4_exit_fs)