c7d39da7e733b13b2760da3341c8991401043a0b
[linux-block.git] / fs / ext4 / super.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext4/super.c
4  *
5  * Copyright (C) 1992, 1993, 1994, 1995
6  * Remy Card (card@masi.ibp.fr)
7  * Laboratoire MASI - Institut Blaise Pascal
8  * Universite Pierre et Marie Curie (Paris VI)
9  *
10  *  from
11  *
12  *  linux/fs/minix/inode.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  Big-endian to little-endian byte-swapping/bitmaps by
17  *        David S. Miller (davem@caip.rutgers.edu), 1995
18  */
19
20 #include <linux/module.h>
21 #include <linux/string.h>
22 #include <linux/fs.h>
23 #include <linux/time.h>
24 #include <linux/vmalloc.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/blkdev.h>
28 #include <linux/backing-dev.h>
29 #include <linux/parser.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h>
32 #include <linux/vfs.h>
33 #include <linux/random.h>
34 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h>
37 #include <linux/seq_file.h>
38 #include <linux/ctype.h>
39 #include <linux/log2.h>
40 #include <linux/crc16.h>
41 #include <linux/dax.h>
42 #include <linux/uaccess.h>
43 #include <linux/iversion.h>
44 #include <linux/unicode.h>
45 #include <linux/part_stat.h>
46 #include <linux/kthread.h>
47 #include <linux/freezer.h>
48 #include <linux/fsnotify.h>
49 #include <linux/fs_context.h>
50 #include <linux/fs_parser.h>
51
52 #include "ext4.h"
53 #include "ext4_extents.h"       /* Needed for trace points definition */
54 #include "ext4_jbd2.h"
55 #include "xattr.h"
56 #include "acl.h"
57 #include "mballoc.h"
58 #include "fsmap.h"
59
60 #define CREATE_TRACE_POINTS
61 #include <trace/events/ext4.h>
62
63 static struct ext4_lazy_init *ext4_li_info;
64 static DEFINE_MUTEX(ext4_li_mtx);
65 static struct ratelimit_state ext4_mount_msg_ratelimit;
66
67 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
68                              unsigned long journal_devnum);
69 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
70 static void ext4_update_super(struct super_block *sb);
71 static int ext4_commit_super(struct super_block *sb);
72 static int ext4_mark_recovery_complete(struct super_block *sb,
73                                         struct ext4_super_block *es);
74 static int ext4_clear_journal_err(struct super_block *sb,
75                                   struct ext4_super_block *es);
76 static int ext4_sync_fs(struct super_block *sb, int wait);
77 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
78 static int ext4_unfreeze(struct super_block *sb);
79 static int ext4_freeze(struct super_block *sb);
80 static inline int ext2_feature_set_ok(struct super_block *sb);
81 static inline int ext3_feature_set_ok(struct super_block *sb);
82 static void ext4_unregister_li_request(struct super_block *sb);
83 static void ext4_clear_request_list(void);
84 static struct inode *ext4_get_journal_inode(struct super_block *sb,
85                                             unsigned int journal_inum);
86 static int ext4_validate_options(struct fs_context *fc);
87 static int ext4_check_opt_consistency(struct fs_context *fc,
88                                       struct super_block *sb);
89 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
90 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
91 static int ext4_get_tree(struct fs_context *fc);
92 static int ext4_reconfigure(struct fs_context *fc);
93 static void ext4_fc_free(struct fs_context *fc);
94 static int ext4_init_fs_context(struct fs_context *fc);
95 static void ext4_kill_sb(struct super_block *sb);
96 static const struct fs_parameter_spec ext4_param_specs[];
97
98 /*
99  * Lock ordering
100  *
101  * page fault path:
102  * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
103  *   -> page lock -> i_data_sem (rw)
104  *
105  * buffered write path:
106  * sb_start_write -> i_mutex -> mmap_lock
107  * sb_start_write -> i_mutex -> transaction start -> page lock ->
108  *   i_data_sem (rw)
109  *
110  * truncate:
111  * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
112  *   page lock
113  * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
114  *   i_data_sem (rw)
115  *
116  * direct IO:
117  * sb_start_write -> i_mutex -> mmap_lock
118  * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
119  *
120  * writepages:
121  * transaction start -> page lock(s) -> i_data_sem (rw)
122  */
123
124 static const struct fs_context_operations ext4_context_ops = {
125         .parse_param    = ext4_parse_param,
126         .get_tree       = ext4_get_tree,
127         .reconfigure    = ext4_reconfigure,
128         .free           = ext4_fc_free,
129 };
130
131
132 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
133 static struct file_system_type ext2_fs_type = {
134         .owner                  = THIS_MODULE,
135         .name                   = "ext2",
136         .init_fs_context        = ext4_init_fs_context,
137         .parameters             = ext4_param_specs,
138         .kill_sb                = ext4_kill_sb,
139         .fs_flags               = FS_REQUIRES_DEV,
140 };
141 MODULE_ALIAS_FS("ext2");
142 MODULE_ALIAS("ext2");
143 #define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
144 #else
145 #define IS_EXT2_SB(sb) (0)
146 #endif
147
148
149 static struct file_system_type ext3_fs_type = {
150         .owner                  = THIS_MODULE,
151         .name                   = "ext3",
152         .init_fs_context        = ext4_init_fs_context,
153         .parameters             = ext4_param_specs,
154         .kill_sb                = ext4_kill_sb,
155         .fs_flags               = FS_REQUIRES_DEV,
156 };
157 MODULE_ALIAS_FS("ext3");
158 MODULE_ALIAS("ext3");
159 #define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
160
161
162 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
163                                   bh_end_io_t *end_io, bool simu_fail)
164 {
165         if (simu_fail) {
166                 clear_buffer_uptodate(bh);
167                 unlock_buffer(bh);
168                 return;
169         }
170
171         /*
172          * buffer's verified bit is no longer valid after reading from
173          * disk again due to write out error, clear it to make sure we
174          * recheck the buffer contents.
175          */
176         clear_buffer_verified(bh);
177
178         bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
179         get_bh(bh);
180         submit_bh(REQ_OP_READ | op_flags, bh);
181 }
182
183 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
184                          bh_end_io_t *end_io, bool simu_fail)
185 {
186         BUG_ON(!buffer_locked(bh));
187
188         if (ext4_buffer_uptodate(bh)) {
189                 unlock_buffer(bh);
190                 return;
191         }
192         __ext4_read_bh(bh, op_flags, end_io, simu_fail);
193 }
194
195 int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
196                  bh_end_io_t *end_io, bool simu_fail)
197 {
198         BUG_ON(!buffer_locked(bh));
199
200         if (ext4_buffer_uptodate(bh)) {
201                 unlock_buffer(bh);
202                 return 0;
203         }
204
205         __ext4_read_bh(bh, op_flags, end_io, simu_fail);
206
207         wait_on_buffer(bh);
208         if (buffer_uptodate(bh))
209                 return 0;
210         return -EIO;
211 }
212
213 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
214 {
215         lock_buffer(bh);
216         if (!wait) {
217                 ext4_read_bh_nowait(bh, op_flags, NULL, false);
218                 return 0;
219         }
220         return ext4_read_bh(bh, op_flags, NULL, false);
221 }
222
223 /*
224  * This works like __bread_gfp() except it uses ERR_PTR for error
225  * returns.  Currently with sb_bread it's impossible to distinguish
226  * between ENOMEM and EIO situations (since both result in a NULL
227  * return.
228  */
229 static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
230                                                sector_t block,
231                                                blk_opf_t op_flags, gfp_t gfp)
232 {
233         struct buffer_head *bh;
234         int ret;
235
236         bh = sb_getblk_gfp(sb, block, gfp);
237         if (bh == NULL)
238                 return ERR_PTR(-ENOMEM);
239         if (ext4_buffer_uptodate(bh))
240                 return bh;
241
242         ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
243         if (ret) {
244                 put_bh(bh);
245                 return ERR_PTR(ret);
246         }
247         return bh;
248 }
249
250 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
251                                    blk_opf_t op_flags)
252 {
253         gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
254                         ~__GFP_FS) | __GFP_MOVABLE;
255
256         return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
257 }
258
259 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
260                                             sector_t block)
261 {
262         gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
263                         ~__GFP_FS);
264
265         return __ext4_sb_bread_gfp(sb, block, 0, gfp);
266 }
267
268 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
269 {
270         struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
271                         sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN);
272
273         if (likely(bh)) {
274                 if (trylock_buffer(bh))
275                         ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL, false);
276                 brelse(bh);
277         }
278 }
279
280 static int ext4_verify_csum_type(struct super_block *sb,
281                                  struct ext4_super_block *es)
282 {
283         if (!ext4_has_feature_metadata_csum(sb))
284                 return 1;
285
286         return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
287 }
288
289 __le32 ext4_superblock_csum(struct ext4_super_block *es)
290 {
291         int offset = offsetof(struct ext4_super_block, s_checksum);
292         __u32 csum;
293
294         csum = ext4_chksum(~0, (char *)es, offset);
295
296         return cpu_to_le32(csum);
297 }
298
299 static int ext4_superblock_csum_verify(struct super_block *sb,
300                                        struct ext4_super_block *es)
301 {
302         if (!ext4_has_feature_metadata_csum(sb))
303                 return 1;
304
305         return es->s_checksum == ext4_superblock_csum(es);
306 }
307
308 void ext4_superblock_csum_set(struct super_block *sb)
309 {
310         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
311
312         if (!ext4_has_feature_metadata_csum(sb))
313                 return;
314
315         es->s_checksum = ext4_superblock_csum(es);
316 }
317
318 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
319                                struct ext4_group_desc *bg)
320 {
321         return le32_to_cpu(bg->bg_block_bitmap_lo) |
322                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
323                  (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
324 }
325
326 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
327                                struct ext4_group_desc *bg)
328 {
329         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
330                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
331                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
332 }
333
334 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
335                               struct ext4_group_desc *bg)
336 {
337         return le32_to_cpu(bg->bg_inode_table_lo) |
338                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
339                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
340 }
341
342 __u32 ext4_free_group_clusters(struct super_block *sb,
343                                struct ext4_group_desc *bg)
344 {
345         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
346                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
347                  (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
348 }
349
350 __u32 ext4_free_inodes_count(struct super_block *sb,
351                               struct ext4_group_desc *bg)
352 {
353         return le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_lo)) |
354                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
355                  (__u32)le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_hi)) << 16 : 0);
356 }
357
358 __u32 ext4_used_dirs_count(struct super_block *sb,
359                               struct ext4_group_desc *bg)
360 {
361         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
362                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
363                  (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
364 }
365
366 __u32 ext4_itable_unused_count(struct super_block *sb,
367                               struct ext4_group_desc *bg)
368 {
369         return le16_to_cpu(bg->bg_itable_unused_lo) |
370                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
371                  (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
372 }
373
374 void ext4_block_bitmap_set(struct super_block *sb,
375                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
376 {
377         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
378         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
379                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
380 }
381
382 void ext4_inode_bitmap_set(struct super_block *sb,
383                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
384 {
385         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
386         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
387                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
388 }
389
390 void ext4_inode_table_set(struct super_block *sb,
391                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
392 {
393         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
394         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
395                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
396 }
397
398 void ext4_free_group_clusters_set(struct super_block *sb,
399                                   struct ext4_group_desc *bg, __u32 count)
400 {
401         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
402         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
403                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
404 }
405
406 void ext4_free_inodes_set(struct super_block *sb,
407                           struct ext4_group_desc *bg, __u32 count)
408 {
409         WRITE_ONCE(bg->bg_free_inodes_count_lo, cpu_to_le16((__u16)count));
410         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
411                 WRITE_ONCE(bg->bg_free_inodes_count_hi, cpu_to_le16(count >> 16));
412 }
413
414 void ext4_used_dirs_set(struct super_block *sb,
415                           struct ext4_group_desc *bg, __u32 count)
416 {
417         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
418         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
419                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
420 }
421
422 void ext4_itable_unused_set(struct super_block *sb,
423                           struct ext4_group_desc *bg, __u32 count)
424 {
425         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
426         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
427                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
428 }
429
430 static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
431 {
432         now = clamp_val(now, 0, (1ull << 40) - 1);
433
434         *lo = cpu_to_le32(lower_32_bits(now));
435         *hi = upper_32_bits(now);
436 }
437
438 static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
439 {
440         return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
441 }
442 #define ext4_update_tstamp(es, tstamp) \
443         __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
444                              ktime_get_real_seconds())
445 #define ext4_get_tstamp(es, tstamp) \
446         __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
447
448 /*
449  * The ext4_maybe_update_superblock() function checks and updates the
450  * superblock if needed.
451  *
452  * This function is designed to update the on-disk superblock only under
453  * certain conditions to prevent excessive disk writes and unnecessary
454  * waking of the disk from sleep. The superblock will be updated if:
455  * 1. More than sbi->s_sb_update_sec (def: 1 hour) has passed since the last
456  *    superblock update
457  * 2. More than sbi->s_sb_update_kb (def: 16MB) kbs have been written since the
458  *    last superblock update.
459  *
460  * @sb: The superblock
461  */
462 static void ext4_maybe_update_superblock(struct super_block *sb)
463 {
464         struct ext4_sb_info *sbi = EXT4_SB(sb);
465         struct ext4_super_block *es = sbi->s_es;
466         journal_t *journal = sbi->s_journal;
467         time64_t now;
468         __u64 last_update;
469         __u64 lifetime_write_kbytes;
470         __u64 diff_size;
471
472         if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
473             !(sb->s_flags & SB_ACTIVE) || !journal ||
474             journal->j_flags & JBD2_UNMOUNT)
475                 return;
476
477         now = ktime_get_real_seconds();
478         last_update = ext4_get_tstamp(es, s_wtime);
479
480         if (likely(now - last_update < sbi->s_sb_update_sec))
481                 return;
482
483         lifetime_write_kbytes = sbi->s_kbytes_written +
484                 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
485                   sbi->s_sectors_written_start) >> 1);
486
487         /* Get the number of kilobytes not written to disk to account
488          * for statistics and compare with a multiple of 16 MB. This
489          * is used to determine when the next superblock commit should
490          * occur (i.e. not more often than once per 16MB if there was
491          * less written in an hour).
492          */
493         diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
494
495         if (diff_size > sbi->s_sb_update_kb)
496                 schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
497 }
498
499 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
500 {
501         struct super_block              *sb = journal->j_private;
502
503         BUG_ON(txn->t_state == T_FINISHED);
504
505         ext4_process_freed_data(sb, txn->t_tid);
506         ext4_maybe_update_superblock(sb);
507 }
508
509 static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
510                 struct folio *folio)
511 {
512         struct buffer_head *bh, *head;
513         struct journal_head *jh;
514
515         bh = head = folio_buffers(folio);
516         do {
517                 /*
518                  * We have to redirty a page in these cases:
519                  * 1) If buffer is dirty, it means the page was dirty because it
520                  * contains a buffer that needs checkpointing. So the dirty bit
521                  * needs to be preserved so that checkpointing writes the buffer
522                  * properly.
523                  * 2) If buffer is not part of the committing transaction
524                  * (we may have just accidentally come across this buffer because
525                  * inode range tracking is not exact) or if the currently running
526                  * transaction already contains this buffer as well, dirty bit
527                  * needs to be preserved so that the buffer gets writeprotected
528                  * properly on running transaction's commit.
529                  */
530                 jh = bh2jh(bh);
531                 if (buffer_dirty(bh) ||
532                     (jh && (jh->b_transaction != jinode->i_transaction ||
533                             jh->b_next_transaction)))
534                         return true;
535         } while ((bh = bh->b_this_page) != head);
536
537         return false;
538 }
539
540 static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
541 {
542         struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
543         struct writeback_control wbc = {
544                 .sync_mode =  WB_SYNC_ALL,
545                 .nr_to_write = LONG_MAX,
546                 .range_start = jinode->i_dirty_start,
547                 .range_end = jinode->i_dirty_end,
548         };
549         struct folio *folio = NULL;
550         int error;
551
552         /*
553          * writeback_iter() already checks for dirty pages and calls
554          * folio_clear_dirty_for_io(), which we want to write protect the
555          * folios.
556          *
557          * However, we may have to redirty a folio sometimes.
558          */
559         while ((folio = writeback_iter(mapping, &wbc, folio, &error))) {
560                 if (ext4_journalled_writepage_needs_redirty(jinode, folio))
561                         folio_redirty_for_writepage(&wbc, folio);
562                 folio_unlock(folio);
563         }
564
565         return error;
566 }
567
568 static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
569 {
570         int ret;
571
572         if (ext4_should_journal_data(jinode->i_vfs_inode))
573                 ret = ext4_journalled_submit_inode_data_buffers(jinode);
574         else
575                 ret = ext4_normal_submit_inode_data_buffers(jinode);
576         return ret;
577 }
578
579 static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
580 {
581         int ret = 0;
582
583         if (!ext4_should_journal_data(jinode->i_vfs_inode))
584                 ret = jbd2_journal_finish_inode_data_buffers(jinode);
585
586         return ret;
587 }
588
589 static bool system_going_down(void)
590 {
591         return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
592                 || system_state == SYSTEM_RESTART;
593 }
594
595 struct ext4_err_translation {
596         int code;
597         int errno;
598 };
599
600 #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
601
602 static struct ext4_err_translation err_translation[] = {
603         EXT4_ERR_TRANSLATE(EIO),
604         EXT4_ERR_TRANSLATE(ENOMEM),
605         EXT4_ERR_TRANSLATE(EFSBADCRC),
606         EXT4_ERR_TRANSLATE(EFSCORRUPTED),
607         EXT4_ERR_TRANSLATE(ENOSPC),
608         EXT4_ERR_TRANSLATE(ENOKEY),
609         EXT4_ERR_TRANSLATE(EROFS),
610         EXT4_ERR_TRANSLATE(EFBIG),
611         EXT4_ERR_TRANSLATE(EEXIST),
612         EXT4_ERR_TRANSLATE(ERANGE),
613         EXT4_ERR_TRANSLATE(EOVERFLOW),
614         EXT4_ERR_TRANSLATE(EBUSY),
615         EXT4_ERR_TRANSLATE(ENOTDIR),
616         EXT4_ERR_TRANSLATE(ENOTEMPTY),
617         EXT4_ERR_TRANSLATE(ESHUTDOWN),
618         EXT4_ERR_TRANSLATE(EFAULT),
619 };
620
621 static int ext4_errno_to_code(int errno)
622 {
623         int i;
624
625         for (i = 0; i < ARRAY_SIZE(err_translation); i++)
626                 if (err_translation[i].errno == errno)
627                         return err_translation[i].code;
628         return EXT4_ERR_UNKNOWN;
629 }
630
631 static void save_error_info(struct super_block *sb, int error,
632                             __u32 ino, __u64 block,
633                             const char *func, unsigned int line)
634 {
635         struct ext4_sb_info *sbi = EXT4_SB(sb);
636
637         /* We default to EFSCORRUPTED error... */
638         if (error == 0)
639                 error = EFSCORRUPTED;
640
641         spin_lock(&sbi->s_error_lock);
642         sbi->s_add_error_count++;
643         sbi->s_last_error_code = error;
644         sbi->s_last_error_line = line;
645         sbi->s_last_error_ino = ino;
646         sbi->s_last_error_block = block;
647         sbi->s_last_error_func = func;
648         sbi->s_last_error_time = ktime_get_real_seconds();
649         if (!sbi->s_first_error_time) {
650                 sbi->s_first_error_code = error;
651                 sbi->s_first_error_line = line;
652                 sbi->s_first_error_ino = ino;
653                 sbi->s_first_error_block = block;
654                 sbi->s_first_error_func = func;
655                 sbi->s_first_error_time = sbi->s_last_error_time;
656         }
657         spin_unlock(&sbi->s_error_lock);
658 }
659
660 /* Deal with the reporting of failure conditions on a filesystem such as
661  * inconsistencies detected or read IO failures.
662  *
663  * On ext2, we can store the error state of the filesystem in the
664  * superblock.  That is not possible on ext4, because we may have other
665  * write ordering constraints on the superblock which prevent us from
666  * writing it out straight away; and given that the journal is about to
667  * be aborted, we can't rely on the current, or future, transactions to
668  * write out the superblock safely.
669  *
670  * We'll just use the jbd2_journal_abort() error code to record an error in
671  * the journal instead.  On recovery, the journal will complain about
672  * that error until we've noted it down and cleared it.
673  *
674  * If force_ro is set, we unconditionally force the filesystem into an
675  * ABORT|READONLY state, unless the error response on the fs has been set to
676  * panic in which case we take the easy way out and panic immediately. This is
677  * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
678  * at a critical moment in log management.
679  */
680 static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
681                               __u32 ino, __u64 block,
682                               const char *func, unsigned int line)
683 {
684         journal_t *journal = EXT4_SB(sb)->s_journal;
685         bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
686
687         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
688         if (test_opt(sb, WARN_ON_ERROR))
689                 WARN_ON_ONCE(1);
690
691         if (!continue_fs && !ext4_emergency_ro(sb) && journal)
692                 jbd2_journal_abort(journal, -EIO);
693
694         if (!bdev_read_only(sb->s_bdev)) {
695                 save_error_info(sb, error, ino, block, func, line);
696                 /*
697                  * In case the fs should keep running, we need to writeout
698                  * superblock through the journal. Due to lock ordering
699                  * constraints, it may not be safe to do it right here so we
700                  * defer superblock flushing to a workqueue. We just need to be
701                  * careful when the journal is already shutting down. If we get
702                  * here in that case, just update the sb directly as the last
703                  * transaction won't commit anyway.
704                  */
705                 if (continue_fs && journal &&
706                     !ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY))
707                         schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
708                 else
709                         ext4_commit_super(sb);
710         }
711
712         /*
713          * We force ERRORS_RO behavior when system is rebooting. Otherwise we
714          * could panic during 'reboot -f' as the underlying device got already
715          * disabled.
716          */
717         if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
718                 panic("EXT4-fs (device %s): panic forced after error\n",
719                         sb->s_id);
720         }
721
722         if (ext4_emergency_ro(sb) || continue_fs)
723                 return;
724
725         ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
726         /*
727          * We don't set SB_RDONLY because that requires sb->s_umount
728          * semaphore and setting it without proper remount procedure is
729          * confusing code such as freeze_super() leading to deadlocks
730          * and other problems.
731          */
732         set_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags);
733 }
734
735 static void update_super_work(struct work_struct *work)
736 {
737         struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
738                                                 s_sb_upd_work);
739         journal_t *journal = sbi->s_journal;
740         handle_t *handle;
741
742         /*
743          * If the journal is still running, we have to write out superblock
744          * through the journal to avoid collisions of other journalled sb
745          * updates.
746          *
747          * We use directly jbd2 functions here to avoid recursing back into
748          * ext4 error handling code during handling of previous errors.
749          */
750         if (!ext4_emergency_state(sbi->s_sb) &&
751             !sb_rdonly(sbi->s_sb) && journal) {
752                 struct buffer_head *sbh = sbi->s_sbh;
753                 bool call_notify_err = false;
754
755                 handle = jbd2_journal_start(journal, 1);
756                 if (IS_ERR(handle))
757                         goto write_directly;
758                 if (jbd2_journal_get_write_access(handle, sbh)) {
759                         jbd2_journal_stop(handle);
760                         goto write_directly;
761                 }
762
763                 if (sbi->s_add_error_count > 0)
764                         call_notify_err = true;
765
766                 ext4_update_super(sbi->s_sb);
767                 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
768                         ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
769                                  "superblock detected");
770                         clear_buffer_write_io_error(sbh);
771                         set_buffer_uptodate(sbh);
772                 }
773
774                 if (jbd2_journal_dirty_metadata(handle, sbh)) {
775                         jbd2_journal_stop(handle);
776                         goto write_directly;
777                 }
778                 jbd2_journal_stop(handle);
779
780                 if (call_notify_err)
781                         ext4_notify_error_sysfs(sbi);
782
783                 return;
784         }
785 write_directly:
786         /*
787          * Write through journal failed. Write sb directly to get error info
788          * out and hope for the best.
789          */
790         ext4_commit_super(sbi->s_sb);
791         ext4_notify_error_sysfs(sbi);
792 }
793
794 #define ext4_error_ratelimit(sb)                                        \
795                 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
796                              "EXT4-fs error")
797
798 void __ext4_error(struct super_block *sb, const char *function,
799                   unsigned int line, bool force_ro, int error, __u64 block,
800                   const char *fmt, ...)
801 {
802         struct va_format vaf;
803         va_list args;
804
805         if (unlikely(ext4_emergency_state(sb)))
806                 return;
807
808         trace_ext4_error(sb, function, line);
809         if (ext4_error_ratelimit(sb)) {
810                 va_start(args, fmt);
811                 vaf.fmt = fmt;
812                 vaf.va = &args;
813                 printk(KERN_CRIT
814                        "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
815                        sb->s_id, function, line, current->comm, &vaf);
816                 va_end(args);
817         }
818         fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
819
820         ext4_handle_error(sb, force_ro, error, 0, block, function, line);
821 }
822
823 void __ext4_error_inode(struct inode *inode, const char *function,
824                         unsigned int line, ext4_fsblk_t block, int error,
825                         const char *fmt, ...)
826 {
827         va_list args;
828         struct va_format vaf;
829
830         if (unlikely(ext4_emergency_state(inode->i_sb)))
831                 return;
832
833         trace_ext4_error(inode->i_sb, function, line);
834         if (ext4_error_ratelimit(inode->i_sb)) {
835                 va_start(args, fmt);
836                 vaf.fmt = fmt;
837                 vaf.va = &args;
838                 if (block)
839                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
840                                "inode #%lu: block %llu: comm %s: %pV\n",
841                                inode->i_sb->s_id, function, line, inode->i_ino,
842                                block, current->comm, &vaf);
843                 else
844                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
845                                "inode #%lu: comm %s: %pV\n",
846                                inode->i_sb->s_id, function, line, inode->i_ino,
847                                current->comm, &vaf);
848                 va_end(args);
849         }
850         fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
851
852         ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
853                           function, line);
854 }
855
856 void __ext4_error_file(struct file *file, const char *function,
857                        unsigned int line, ext4_fsblk_t block,
858                        const char *fmt, ...)
859 {
860         va_list args;
861         struct va_format vaf;
862         struct inode *inode = file_inode(file);
863         char pathname[80], *path;
864
865         if (unlikely(ext4_emergency_state(inode->i_sb)))
866                 return;
867
868         trace_ext4_error(inode->i_sb, function, line);
869         if (ext4_error_ratelimit(inode->i_sb)) {
870                 path = file_path(file, pathname, sizeof(pathname));
871                 if (IS_ERR(path))
872                         path = "(unknown)";
873                 va_start(args, fmt);
874                 vaf.fmt = fmt;
875                 vaf.va = &args;
876                 if (block)
877                         printk(KERN_CRIT
878                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
879                                "block %llu: comm %s: path %s: %pV\n",
880                                inode->i_sb->s_id, function, line, inode->i_ino,
881                                block, current->comm, path, &vaf);
882                 else
883                         printk(KERN_CRIT
884                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
885                                "comm %s: path %s: %pV\n",
886                                inode->i_sb->s_id, function, line, inode->i_ino,
887                                current->comm, path, &vaf);
888                 va_end(args);
889         }
890         fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
891
892         ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
893                           function, line);
894 }
895
896 const char *ext4_decode_error(struct super_block *sb, int errno,
897                               char nbuf[16])
898 {
899         char *errstr = NULL;
900
901         switch (errno) {
902         case -EFSCORRUPTED:
903                 errstr = "Corrupt filesystem";
904                 break;
905         case -EFSBADCRC:
906                 errstr = "Filesystem failed CRC";
907                 break;
908         case -EIO:
909                 errstr = "IO failure";
910                 break;
911         case -ENOMEM:
912                 errstr = "Out of memory";
913                 break;
914         case -EROFS:
915                 if (!sb || (EXT4_SB(sb)->s_journal &&
916                             EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
917                         errstr = "Journal has aborted";
918                 else
919                         errstr = "Readonly filesystem";
920                 break;
921         default:
922                 /* If the caller passed in an extra buffer for unknown
923                  * errors, textualise them now.  Else we just return
924                  * NULL. */
925                 if (nbuf) {
926                         /* Check for truncated error codes... */
927                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
928                                 errstr = nbuf;
929                 }
930                 break;
931         }
932
933         return errstr;
934 }
935
936 /* __ext4_std_error decodes expected errors from journaling functions
937  * automatically and invokes the appropriate error response.  */
938
939 void __ext4_std_error(struct super_block *sb, const char *function,
940                       unsigned int line, int errno)
941 {
942         char nbuf[16];
943         const char *errstr;
944
945         if (unlikely(ext4_emergency_state(sb)))
946                 return;
947
948         /* Special case: if the error is EROFS, and we're not already
949          * inside a transaction, then there's really no point in logging
950          * an error. */
951         if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
952                 return;
953
954         if (ext4_error_ratelimit(sb)) {
955                 errstr = ext4_decode_error(sb, errno, nbuf);
956                 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
957                        sb->s_id, function, line, errstr);
958         }
959         fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
960
961         ext4_handle_error(sb, false, -errno, 0, 0, function, line);
962 }
963
964 void __ext4_msg(struct super_block *sb,
965                 const char *prefix, const char *fmt, ...)
966 {
967         struct va_format vaf;
968         va_list args;
969
970         if (sb) {
971                 atomic_inc(&EXT4_SB(sb)->s_msg_count);
972                 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
973                                   "EXT4-fs"))
974                         return;
975         }
976
977         va_start(args, fmt);
978         vaf.fmt = fmt;
979         vaf.va = &args;
980         if (sb)
981                 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
982         else
983                 printk("%sEXT4-fs: %pV\n", prefix, &vaf);
984         va_end(args);
985 }
986
987 static int ext4_warning_ratelimit(struct super_block *sb)
988 {
989         atomic_inc(&EXT4_SB(sb)->s_warning_count);
990         return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
991                             "EXT4-fs warning");
992 }
993
994 void __ext4_warning(struct super_block *sb, const char *function,
995                     unsigned int line, const char *fmt, ...)
996 {
997         struct va_format vaf;
998         va_list args;
999
1000         if (!ext4_warning_ratelimit(sb))
1001                 return;
1002
1003         va_start(args, fmt);
1004         vaf.fmt = fmt;
1005         vaf.va = &args;
1006         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
1007                sb->s_id, function, line, &vaf);
1008         va_end(args);
1009 }
1010
1011 void __ext4_warning_inode(const struct inode *inode, const char *function,
1012                           unsigned int line, const char *fmt, ...)
1013 {
1014         struct va_format vaf;
1015         va_list args;
1016
1017         if (!ext4_warning_ratelimit(inode->i_sb))
1018                 return;
1019
1020         va_start(args, fmt);
1021         vaf.fmt = fmt;
1022         vaf.va = &args;
1023         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
1024                "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
1025                function, line, inode->i_ino, current->comm, &vaf);
1026         va_end(args);
1027 }
1028
1029 void __ext4_grp_locked_error(const char *function, unsigned int line,
1030                              struct super_block *sb, ext4_group_t grp,
1031                              unsigned long ino, ext4_fsblk_t block,
1032                              const char *fmt, ...)
1033 __releases(bitlock)
1034 __acquires(bitlock)
1035 {
1036         struct va_format vaf;
1037         va_list args;
1038
1039         if (unlikely(ext4_emergency_state(sb)))
1040                 return;
1041
1042         trace_ext4_error(sb, function, line);
1043         if (ext4_error_ratelimit(sb)) {
1044                 va_start(args, fmt);
1045                 vaf.fmt = fmt;
1046                 vaf.va = &args;
1047                 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1048                        sb->s_id, function, line, grp);
1049                 if (ino)
1050                         printk(KERN_CONT "inode %lu: ", ino);
1051                 if (block)
1052                         printk(KERN_CONT "block %llu:",
1053                                (unsigned long long) block);
1054                 printk(KERN_CONT "%pV\n", &vaf);
1055                 va_end(args);
1056         }
1057
1058         if (test_opt(sb, ERRORS_CONT)) {
1059                 if (test_opt(sb, WARN_ON_ERROR))
1060                         WARN_ON_ONCE(1);
1061                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1062                 if (!bdev_read_only(sb->s_bdev)) {
1063                         save_error_info(sb, EFSCORRUPTED, ino, block, function,
1064                                         line);
1065                         schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
1066                 }
1067                 return;
1068         }
1069         ext4_unlock_group(sb, grp);
1070         ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1071         /*
1072          * We only get here in the ERRORS_RO case; relocking the group
1073          * may be dangerous, but nothing bad will happen since the
1074          * filesystem will have already been marked read/only and the
1075          * journal has been aborted.  We return 1 as a hint to callers
1076          * who might what to use the return value from
1077          * ext4_grp_locked_error() to distinguish between the
1078          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1079          * aggressively from the ext4 function in question, with a
1080          * more appropriate error code.
1081          */
1082         ext4_lock_group(sb, grp);
1083         return;
1084 }
1085
1086 void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1087                                      ext4_group_t group,
1088                                      unsigned int flags)
1089 {
1090         struct ext4_sb_info *sbi = EXT4_SB(sb);
1091         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1092         struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1093         int ret;
1094
1095         if (!grp || !gdp)
1096                 return;
1097         if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1098                 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1099                                             &grp->bb_state);
1100                 if (!ret)
1101                         percpu_counter_sub(&sbi->s_freeclusters_counter,
1102                                            grp->bb_free);
1103         }
1104
1105         if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1106                 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1107                                             &grp->bb_state);
1108                 if (!ret && gdp) {
1109                         int count;
1110
1111                         count = ext4_free_inodes_count(sb, gdp);
1112                         percpu_counter_sub(&sbi->s_freeinodes_counter,
1113                                            count);
1114                 }
1115         }
1116 }
1117
1118 void ext4_update_dynamic_rev(struct super_block *sb)
1119 {
1120         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1121
1122         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1123                 return;
1124
1125         ext4_warning(sb,
1126                      "updating to rev %d because of new feature flag, "
1127                      "running e2fsck is recommended",
1128                      EXT4_DYNAMIC_REV);
1129
1130         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1131         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1132         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1133         /* leave es->s_feature_*compat flags alone */
1134         /* es->s_uuid will be set by e2fsck if empty */
1135
1136         /*
1137          * The rest of the superblock fields should be zero, and if not it
1138          * means they are likely already in use, so leave them alone.  We
1139          * can leave it up to e2fsck to clean up any inconsistencies there.
1140          */
1141 }
1142
1143 static inline struct inode *orphan_list_entry(struct list_head *l)
1144 {
1145         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1146 }
1147
1148 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1149 {
1150         struct list_head *l;
1151
1152         ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1153                  le32_to_cpu(sbi->s_es->s_last_orphan));
1154
1155         printk(KERN_ERR "sb_info orphan list:\n");
1156         list_for_each(l, &sbi->s_orphan) {
1157                 struct inode *inode = orphan_list_entry(l);
1158                 printk(KERN_ERR "  "
1159                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1160                        inode->i_sb->s_id, inode->i_ino, inode,
1161                        inode->i_mode, inode->i_nlink,
1162                        NEXT_ORPHAN(inode));
1163         }
1164 }
1165
1166 #ifdef CONFIG_QUOTA
1167 static int ext4_quota_off(struct super_block *sb, int type);
1168
1169 static inline void ext4_quotas_off(struct super_block *sb, int type)
1170 {
1171         BUG_ON(type > EXT4_MAXQUOTAS);
1172
1173         /* Use our quota_off function to clear inode flags etc. */
1174         for (type--; type >= 0; type--)
1175                 ext4_quota_off(sb, type);
1176 }
1177
1178 /*
1179  * This is a helper function which is used in the mount/remount
1180  * codepaths (which holds s_umount) to fetch the quota file name.
1181  */
1182 static inline char *get_qf_name(struct super_block *sb,
1183                                 struct ext4_sb_info *sbi,
1184                                 int type)
1185 {
1186         return rcu_dereference_protected(sbi->s_qf_names[type],
1187                                          lockdep_is_held(&sb->s_umount));
1188 }
1189 #else
1190 static inline void ext4_quotas_off(struct super_block *sb, int type)
1191 {
1192 }
1193 #endif
1194
1195 static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
1196 {
1197         ext4_fsblk_t block;
1198         int err;
1199
1200         block = ext4_count_free_clusters(sbi->s_sb);
1201         ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
1202         err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
1203                                   GFP_KERNEL);
1204         if (!err) {
1205                 unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
1206                 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
1207                 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
1208                                           GFP_KERNEL);
1209         }
1210         if (!err)
1211                 err = percpu_counter_init(&sbi->s_dirs_counter,
1212                                           ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
1213         if (!err)
1214                 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
1215                                           GFP_KERNEL);
1216         if (!err)
1217                 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
1218                                           GFP_KERNEL);
1219         if (!err)
1220                 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
1221
1222         if (err)
1223                 ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
1224
1225         return err;
1226 }
1227
1228 static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
1229 {
1230         percpu_counter_destroy(&sbi->s_freeclusters_counter);
1231         percpu_counter_destroy(&sbi->s_freeinodes_counter);
1232         percpu_counter_destroy(&sbi->s_dirs_counter);
1233         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1234         percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1235         percpu_free_rwsem(&sbi->s_writepages_rwsem);
1236 }
1237
1238 static void ext4_group_desc_free(struct ext4_sb_info *sbi)
1239 {
1240         struct buffer_head **group_desc;
1241         int i;
1242
1243         rcu_read_lock();
1244         group_desc = rcu_dereference(sbi->s_group_desc);
1245         for (i = 0; i < sbi->s_gdb_count; i++)
1246                 brelse(group_desc[i]);
1247         kvfree(group_desc);
1248         rcu_read_unlock();
1249 }
1250
1251 static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
1252 {
1253         struct flex_groups **flex_groups;
1254         int i;
1255
1256         rcu_read_lock();
1257         flex_groups = rcu_dereference(sbi->s_flex_groups);
1258         if (flex_groups) {
1259                 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1260                         kvfree(flex_groups[i]);
1261                 kvfree(flex_groups);
1262         }
1263         rcu_read_unlock();
1264 }
1265
1266 static void ext4_put_super(struct super_block *sb)
1267 {
1268         struct ext4_sb_info *sbi = EXT4_SB(sb);
1269         struct ext4_super_block *es = sbi->s_es;
1270         int aborted = 0;
1271         int err;
1272
1273         /*
1274          * Unregister sysfs before destroying jbd2 journal.
1275          * Since we could still access attr_journal_task attribute via sysfs
1276          * path which could have sbi->s_journal->j_task as NULL
1277          * Unregister sysfs before flush sbi->s_sb_upd_work.
1278          * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1279          * read metadata verify failed then will queue error work.
1280          * update_super_work will call start_this_handle may trigger
1281          * BUG_ON.
1282          */
1283         ext4_unregister_sysfs(sb);
1284
1285         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1286                 ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
1287                          &sb->s_uuid);
1288
1289         ext4_unregister_li_request(sb);
1290         ext4_quotas_off(sb, EXT4_MAXQUOTAS);
1291
1292         destroy_workqueue(sbi->rsv_conversion_wq);
1293         ext4_release_orphan_info(sb);
1294
1295         if (sbi->s_journal) {
1296                 aborted = is_journal_aborted(sbi->s_journal);
1297                 err = ext4_journal_destroy(sbi, sbi->s_journal);
1298                 if ((err < 0) && !aborted) {
1299                         ext4_abort(sb, -err, "Couldn't clean up the journal");
1300                 }
1301         } else
1302                 flush_work(&sbi->s_sb_upd_work);
1303
1304         ext4_es_unregister_shrinker(sbi);
1305         timer_shutdown_sync(&sbi->s_err_report);
1306         ext4_release_system_zone(sb);
1307         ext4_mb_release(sb);
1308         ext4_ext_release(sb);
1309
1310         if (!ext4_emergency_state(sb) && !sb_rdonly(sb)) {
1311                 if (!aborted) {
1312                         ext4_clear_feature_journal_needs_recovery(sb);
1313                         ext4_clear_feature_orphan_present(sb);
1314                         es->s_state = cpu_to_le16(sbi->s_mount_state);
1315                 }
1316                 ext4_commit_super(sb);
1317         }
1318
1319         ext4_group_desc_free(sbi);
1320         ext4_flex_groups_free(sbi);
1321
1322         WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
1323                      percpu_counter_sum(&sbi->s_dirtyclusters_counter));
1324         ext4_percpu_param_destroy(sbi);
1325 #ifdef CONFIG_QUOTA
1326         for (int i = 0; i < EXT4_MAXQUOTAS; i++)
1327                 kfree(get_qf_name(sb, sbi, i));
1328 #endif
1329
1330         /* Debugging code just in case the in-memory inode orphan list
1331          * isn't empty.  The on-disk one can be non-empty if we've
1332          * detected an error and taken the fs readonly, but the
1333          * in-memory list had better be clean by this point. */
1334         if (!list_empty(&sbi->s_orphan))
1335                 dump_orphan_list(sb, sbi);
1336         ASSERT(list_empty(&sbi->s_orphan));
1337
1338         sync_blockdev(sb->s_bdev);
1339         invalidate_bdev(sb->s_bdev);
1340         if (sbi->s_journal_bdev_file) {
1341                 /*
1342                  * Invalidate the journal device's buffers.  We don't want them
1343                  * floating about in memory - the physical journal device may
1344                  * hotswapped, and it breaks the `ro-after' testing code.
1345                  */
1346                 sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
1347                 invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
1348         }
1349
1350         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1351         sbi->s_ea_inode_cache = NULL;
1352
1353         ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1354         sbi->s_ea_block_cache = NULL;
1355
1356         ext4_stop_mmpd(sbi);
1357
1358         brelse(sbi->s_sbh);
1359         sb->s_fs_info = NULL;
1360         /*
1361          * Now that we are completely done shutting down the
1362          * superblock, we need to actually destroy the kobject.
1363          */
1364         kobject_put(&sbi->s_kobj);
1365         wait_for_completion(&sbi->s_kobj_unregister);
1366         kfree(sbi->s_blockgroup_lock);
1367         fs_put_dax(sbi->s_daxdev, NULL);
1368         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1369 #if IS_ENABLED(CONFIG_UNICODE)
1370         utf8_unload(sb->s_encoding);
1371 #endif
1372         kfree(sbi);
1373 }
1374
1375 static struct kmem_cache *ext4_inode_cachep;
1376
1377 /*
1378  * Called inside transaction, so use GFP_NOFS
1379  */
1380 static struct inode *ext4_alloc_inode(struct super_block *sb)
1381 {
1382         struct ext4_inode_info *ei;
1383
1384         ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1385         if (!ei)
1386                 return NULL;
1387
1388         inode_set_iversion(&ei->vfs_inode, 1);
1389         ei->i_flags = 0;
1390         spin_lock_init(&ei->i_raw_lock);
1391         ei->i_prealloc_node = RB_ROOT;
1392         atomic_set(&ei->i_prealloc_active, 0);
1393         rwlock_init(&ei->i_prealloc_lock);
1394         ext4_es_init_tree(&ei->i_es_tree);
1395         rwlock_init(&ei->i_es_lock);
1396         INIT_LIST_HEAD(&ei->i_es_list);
1397         ei->i_es_all_nr = 0;
1398         ei->i_es_shk_nr = 0;
1399         ei->i_es_shrink_lblk = 0;
1400         ei->i_reserved_data_blocks = 0;
1401         spin_lock_init(&(ei->i_block_reservation_lock));
1402         ext4_init_pending_tree(&ei->i_pending_tree);
1403 #ifdef CONFIG_QUOTA
1404         ei->i_reserved_quota = 0;
1405         memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1406 #endif
1407         ei->jinode = NULL;
1408         INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1409         spin_lock_init(&ei->i_completed_io_lock);
1410         ei->i_sync_tid = 0;
1411         ei->i_datasync_tid = 0;
1412         INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1413         ext4_fc_init_inode(&ei->vfs_inode);
1414         spin_lock_init(&ei->i_fc_lock);
1415         return &ei->vfs_inode;
1416 }
1417
1418 static int ext4_drop_inode(struct inode *inode)
1419 {
1420         int drop = generic_drop_inode(inode);
1421
1422         if (!drop)
1423                 drop = fscrypt_drop_inode(inode);
1424
1425         trace_ext4_drop_inode(inode, drop);
1426         return drop;
1427 }
1428
1429 static void ext4_free_in_core_inode(struct inode *inode)
1430 {
1431         fscrypt_free_inode(inode);
1432         if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1433                 pr_warn("%s: inode %ld still in fc list",
1434                         __func__, inode->i_ino);
1435         }
1436         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1437 }
1438
1439 static void ext4_destroy_inode(struct inode *inode)
1440 {
1441         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1442                 ext4_msg(inode->i_sb, KERN_ERR,
1443                          "Inode %lu (%p): orphan list check failed!",
1444                          inode->i_ino, EXT4_I(inode));
1445                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1446                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
1447                                 true);
1448                 dump_stack();
1449         }
1450
1451         if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
1452             WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
1453                 ext4_msg(inode->i_sb, KERN_ERR,
1454                          "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1455                          inode->i_ino, EXT4_I(inode),
1456                          EXT4_I(inode)->i_reserved_data_blocks);
1457 }
1458
1459 static void ext4_shutdown(struct super_block *sb)
1460 {
1461        ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
1462 }
1463
1464 static void init_once(void *foo)
1465 {
1466         struct ext4_inode_info *ei = foo;
1467
1468         INIT_LIST_HEAD(&ei->i_orphan);
1469         init_rwsem(&ei->xattr_sem);
1470         init_rwsem(&ei->i_data_sem);
1471         inode_init_once(&ei->vfs_inode);
1472         ext4_fc_init_inode(&ei->vfs_inode);
1473 }
1474
1475 static int __init init_inodecache(void)
1476 {
1477         ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1478                                 sizeof(struct ext4_inode_info), 0,
1479                                 SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
1480                                 offsetof(struct ext4_inode_info, i_data),
1481                                 sizeof_field(struct ext4_inode_info, i_data),
1482                                 init_once);
1483         if (ext4_inode_cachep == NULL)
1484                 return -ENOMEM;
1485         return 0;
1486 }
1487
1488 static void destroy_inodecache(void)
1489 {
1490         /*
1491          * Make sure all delayed rcu free inodes are flushed before we
1492          * destroy cache.
1493          */
1494         rcu_barrier();
1495         kmem_cache_destroy(ext4_inode_cachep);
1496 }
1497
1498 void ext4_clear_inode(struct inode *inode)
1499 {
1500         ext4_fc_del(inode);
1501         invalidate_inode_buffers(inode);
1502         clear_inode(inode);
1503         ext4_discard_preallocations(inode);
1504         ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1505         dquot_drop(inode);
1506         if (EXT4_I(inode)->jinode) {
1507                 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1508                                                EXT4_I(inode)->jinode);
1509                 jbd2_free_inode(EXT4_I(inode)->jinode);
1510                 EXT4_I(inode)->jinode = NULL;
1511         }
1512         fscrypt_put_encryption_info(inode);
1513         fsverity_cleanup_inode(inode);
1514 }
1515
1516 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1517                                         u64 ino, u32 generation)
1518 {
1519         struct inode *inode;
1520
1521         /*
1522          * Currently we don't know the generation for parent directory, so
1523          * a generation of 0 means "accept any"
1524          */
1525         inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1526         if (IS_ERR(inode))
1527                 return ERR_CAST(inode);
1528         if (generation && inode->i_generation != generation) {
1529                 iput(inode);
1530                 return ERR_PTR(-ESTALE);
1531         }
1532
1533         return inode;
1534 }
1535
1536 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1537                                         int fh_len, int fh_type)
1538 {
1539         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1540                                     ext4_nfs_get_inode);
1541 }
1542
1543 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1544                                         int fh_len, int fh_type)
1545 {
1546         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1547                                     ext4_nfs_get_inode);
1548 }
1549
1550 static int ext4_nfs_commit_metadata(struct inode *inode)
1551 {
1552         struct writeback_control wbc = {
1553                 .sync_mode = WB_SYNC_ALL
1554         };
1555
1556         trace_ext4_nfs_commit_metadata(inode);
1557         return ext4_write_inode(inode, &wbc);
1558 }
1559
1560 #ifdef CONFIG_QUOTA
1561 static const char * const quotatypes[] = INITQFNAMES;
1562 #define QTYPE2NAME(t) (quotatypes[t])
1563
1564 static int ext4_write_dquot(struct dquot *dquot);
1565 static int ext4_acquire_dquot(struct dquot *dquot);
1566 static int ext4_release_dquot(struct dquot *dquot);
1567 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1568 static int ext4_write_info(struct super_block *sb, int type);
1569 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1570                          const struct path *path);
1571 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1572                                size_t len, loff_t off);
1573 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1574                                 const char *data, size_t len, loff_t off);
1575 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1576                              unsigned int flags);
1577
1578 static struct dquot __rcu **ext4_get_dquots(struct inode *inode)
1579 {
1580         return EXT4_I(inode)->i_dquot;
1581 }
1582
1583 static const struct dquot_operations ext4_quota_operations = {
1584         .get_reserved_space     = ext4_get_reserved_space,
1585         .write_dquot            = ext4_write_dquot,
1586         .acquire_dquot          = ext4_acquire_dquot,
1587         .release_dquot          = ext4_release_dquot,
1588         .mark_dirty             = ext4_mark_dquot_dirty,
1589         .write_info             = ext4_write_info,
1590         .alloc_dquot            = dquot_alloc,
1591         .destroy_dquot          = dquot_destroy,
1592         .get_projid             = ext4_get_projid,
1593         .get_inode_usage        = ext4_get_inode_usage,
1594         .get_next_id            = dquot_get_next_id,
1595 };
1596
1597 static const struct quotactl_ops ext4_qctl_operations = {
1598         .quota_on       = ext4_quota_on,
1599         .quota_off      = ext4_quota_off,
1600         .quota_sync     = dquot_quota_sync,
1601         .get_state      = dquot_get_state,
1602         .set_info       = dquot_set_dqinfo,
1603         .get_dqblk      = dquot_get_dqblk,
1604         .set_dqblk      = dquot_set_dqblk,
1605         .get_nextdqblk  = dquot_get_next_dqblk,
1606 };
1607 #endif
1608
1609 static const struct super_operations ext4_sops = {
1610         .alloc_inode    = ext4_alloc_inode,
1611         .free_inode     = ext4_free_in_core_inode,
1612         .destroy_inode  = ext4_destroy_inode,
1613         .write_inode    = ext4_write_inode,
1614         .dirty_inode    = ext4_dirty_inode,
1615         .drop_inode     = ext4_drop_inode,
1616         .evict_inode    = ext4_evict_inode,
1617         .put_super      = ext4_put_super,
1618         .sync_fs        = ext4_sync_fs,
1619         .freeze_fs      = ext4_freeze,
1620         .unfreeze_fs    = ext4_unfreeze,
1621         .statfs         = ext4_statfs,
1622         .show_options   = ext4_show_options,
1623         .shutdown       = ext4_shutdown,
1624 #ifdef CONFIG_QUOTA
1625         .quota_read     = ext4_quota_read,
1626         .quota_write    = ext4_quota_write,
1627         .get_dquots     = ext4_get_dquots,
1628 #endif
1629 };
1630
1631 static const struct export_operations ext4_export_ops = {
1632         .encode_fh = generic_encode_ino32_fh,
1633         .fh_to_dentry = ext4_fh_to_dentry,
1634         .fh_to_parent = ext4_fh_to_parent,
1635         .get_parent = ext4_get_parent,
1636         .commit_metadata = ext4_nfs_commit_metadata,
1637 };
1638
1639 enum {
1640         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1641         Opt_resgid, Opt_resuid, Opt_sb,
1642         Opt_nouid32, Opt_debug, Opt_removed,
1643         Opt_user_xattr, Opt_acl,
1644         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1645         Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1646         Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1647         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1648         Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1649         Opt_inlinecrypt,
1650         Opt_usrjquota, Opt_grpjquota, Opt_quota,
1651         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1652         Opt_usrquota, Opt_grpquota, Opt_prjquota,
1653         Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1654         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1655         Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1656         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1657         Opt_inode_readahead_blks, Opt_journal_ioprio,
1658         Opt_dioread_nolock, Opt_dioread_lock,
1659         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1660         Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1661         Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1662         Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1663 #ifdef CONFIG_EXT4_DEBUG
1664         Opt_fc_debug_max_replay, Opt_fc_debug_force
1665 #endif
1666 };
1667
1668 static const struct constant_table ext4_param_errors[] = {
1669         {"continue",    EXT4_MOUNT_ERRORS_CONT},
1670         {"panic",       EXT4_MOUNT_ERRORS_PANIC},
1671         {"remount-ro",  EXT4_MOUNT_ERRORS_RO},
1672         {}
1673 };
1674
1675 static const struct constant_table ext4_param_data[] = {
1676         {"journal",     EXT4_MOUNT_JOURNAL_DATA},
1677         {"ordered",     EXT4_MOUNT_ORDERED_DATA},
1678         {"writeback",   EXT4_MOUNT_WRITEBACK_DATA},
1679         {}
1680 };
1681
1682 static const struct constant_table ext4_param_data_err[] = {
1683         {"abort",       Opt_data_err_abort},
1684         {"ignore",      Opt_data_err_ignore},
1685         {}
1686 };
1687
1688 static const struct constant_table ext4_param_jqfmt[] = {
1689         {"vfsold",      QFMT_VFS_OLD},
1690         {"vfsv0",       QFMT_VFS_V0},
1691         {"vfsv1",       QFMT_VFS_V1},
1692         {}
1693 };
1694
1695 static const struct constant_table ext4_param_dax[] = {
1696         {"always",      Opt_dax_always},
1697         {"inode",       Opt_dax_inode},
1698         {"never",       Opt_dax_never},
1699         {}
1700 };
1701
1702 /*
1703  * Mount option specification
1704  * We don't use fsparam_flag_no because of the way we set the
1705  * options and the way we show them in _ext4_show_options(). To
1706  * keep the changes to a minimum, let's keep the negative options
1707  * separate for now.
1708  */
1709 static const struct fs_parameter_spec ext4_param_specs[] = {
1710         fsparam_flag    ("bsddf",               Opt_bsd_df),
1711         fsparam_flag    ("minixdf",             Opt_minix_df),
1712         fsparam_flag    ("grpid",               Opt_grpid),
1713         fsparam_flag    ("bsdgroups",           Opt_grpid),
1714         fsparam_flag    ("nogrpid",             Opt_nogrpid),
1715         fsparam_flag    ("sysvgroups",          Opt_nogrpid),
1716         fsparam_gid     ("resgid",              Opt_resgid),
1717         fsparam_uid     ("resuid",              Opt_resuid),
1718         fsparam_u32     ("sb",                  Opt_sb),
1719         fsparam_enum    ("errors",              Opt_errors, ext4_param_errors),
1720         fsparam_flag    ("nouid32",             Opt_nouid32),
1721         fsparam_flag    ("debug",               Opt_debug),
1722         fsparam_flag    ("oldalloc",            Opt_removed),
1723         fsparam_flag    ("orlov",               Opt_removed),
1724         fsparam_flag    ("user_xattr",          Opt_user_xattr),
1725         fsparam_flag    ("acl",                 Opt_acl),
1726         fsparam_flag    ("norecovery",          Opt_noload),
1727         fsparam_flag    ("noload",              Opt_noload),
1728         fsparam_flag    ("bh",                  Opt_removed),
1729         fsparam_flag    ("nobh",                Opt_removed),
1730         fsparam_u32     ("commit",              Opt_commit),
1731         fsparam_u32     ("min_batch_time",      Opt_min_batch_time),
1732         fsparam_u32     ("max_batch_time",      Opt_max_batch_time),
1733         fsparam_u32     ("journal_dev",         Opt_journal_dev),
1734         fsparam_bdev    ("journal_path",        Opt_journal_path),
1735         fsparam_flag    ("journal_checksum",    Opt_journal_checksum),
1736         fsparam_flag    ("nojournal_checksum",  Opt_nojournal_checksum),
1737         fsparam_flag    ("journal_async_commit",Opt_journal_async_commit),
1738         fsparam_flag    ("abort",               Opt_abort),
1739         fsparam_enum    ("data",                Opt_data, ext4_param_data),
1740         fsparam_enum    ("data_err",            Opt_data_err,
1741                                                 ext4_param_data_err),
1742         fsparam_string_empty
1743                         ("usrjquota",           Opt_usrjquota),
1744         fsparam_string_empty
1745                         ("grpjquota",           Opt_grpjquota),
1746         fsparam_enum    ("jqfmt",               Opt_jqfmt, ext4_param_jqfmt),
1747         fsparam_flag    ("grpquota",            Opt_grpquota),
1748         fsparam_flag    ("quota",               Opt_quota),
1749         fsparam_flag    ("noquota",             Opt_noquota),
1750         fsparam_flag    ("usrquota",            Opt_usrquota),
1751         fsparam_flag    ("prjquota",            Opt_prjquota),
1752         fsparam_flag    ("barrier",             Opt_barrier),
1753         fsparam_u32     ("barrier",             Opt_barrier),
1754         fsparam_flag    ("nobarrier",           Opt_nobarrier),
1755         fsparam_flag    ("i_version",           Opt_removed),
1756         fsparam_flag    ("dax",                 Opt_dax),
1757         fsparam_enum    ("dax",                 Opt_dax_type, ext4_param_dax),
1758         fsparam_u32     ("stripe",              Opt_stripe),
1759         fsparam_flag    ("delalloc",            Opt_delalloc),
1760         fsparam_flag    ("nodelalloc",          Opt_nodelalloc),
1761         fsparam_flag    ("warn_on_error",       Opt_warn_on_error),
1762         fsparam_flag    ("nowarn_on_error",     Opt_nowarn_on_error),
1763         fsparam_u32     ("debug_want_extra_isize",
1764                                                 Opt_debug_want_extra_isize),
1765         fsparam_flag    ("mblk_io_submit",      Opt_removed),
1766         fsparam_flag    ("nomblk_io_submit",    Opt_removed),
1767         fsparam_flag    ("block_validity",      Opt_block_validity),
1768         fsparam_flag    ("noblock_validity",    Opt_noblock_validity),
1769         fsparam_u32     ("inode_readahead_blks",
1770                                                 Opt_inode_readahead_blks),
1771         fsparam_u32     ("journal_ioprio",      Opt_journal_ioprio),
1772         fsparam_u32     ("auto_da_alloc",       Opt_auto_da_alloc),
1773         fsparam_flag    ("auto_da_alloc",       Opt_auto_da_alloc),
1774         fsparam_flag    ("noauto_da_alloc",     Opt_noauto_da_alloc),
1775         fsparam_flag    ("dioread_nolock",      Opt_dioread_nolock),
1776         fsparam_flag    ("nodioread_nolock",    Opt_dioread_lock),
1777         fsparam_flag    ("dioread_lock",        Opt_dioread_lock),
1778         fsparam_flag    ("discard",             Opt_discard),
1779         fsparam_flag    ("nodiscard",           Opt_nodiscard),
1780         fsparam_u32     ("init_itable",         Opt_init_itable),
1781         fsparam_flag    ("init_itable",         Opt_init_itable),
1782         fsparam_flag    ("noinit_itable",       Opt_noinit_itable),
1783 #ifdef CONFIG_EXT4_DEBUG
1784         fsparam_flag    ("fc_debug_force",      Opt_fc_debug_force),
1785         fsparam_u32     ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1786 #endif
1787         fsparam_u32     ("max_dir_size_kb",     Opt_max_dir_size_kb),
1788         fsparam_flag    ("test_dummy_encryption",
1789                                                 Opt_test_dummy_encryption),
1790         fsparam_string  ("test_dummy_encryption",
1791                                                 Opt_test_dummy_encryption),
1792         fsparam_flag    ("inlinecrypt",         Opt_inlinecrypt),
1793         fsparam_flag    ("nombcache",           Opt_nombcache),
1794         fsparam_flag    ("no_mbcache",          Opt_nombcache), /* for backward compatibility */
1795         fsparam_flag    ("prefetch_block_bitmaps",
1796                                                 Opt_removed),
1797         fsparam_flag    ("no_prefetch_block_bitmaps",
1798                                                 Opt_no_prefetch_block_bitmaps),
1799         fsparam_s32     ("mb_optimize_scan",    Opt_mb_optimize_scan),
1800         fsparam_string  ("check",               Opt_removed),   /* mount option from ext2/3 */
1801         fsparam_flag    ("nocheck",             Opt_removed),   /* mount option from ext2/3 */
1802         fsparam_flag    ("reservation",         Opt_removed),   /* mount option from ext2/3 */
1803         fsparam_flag    ("noreservation",       Opt_removed),   /* mount option from ext2/3 */
1804         fsparam_u32     ("journal",             Opt_removed),   /* mount option from ext2/3 */
1805         {}
1806 };
1807
1808
1809 #define MOPT_SET        0x0001
1810 #define MOPT_CLEAR      0x0002
1811 #define MOPT_NOSUPPORT  0x0004
1812 #define MOPT_EXPLICIT   0x0008
1813 #ifdef CONFIG_QUOTA
1814 #define MOPT_Q          0
1815 #define MOPT_QFMT       0x0010
1816 #else
1817 #define MOPT_Q          MOPT_NOSUPPORT
1818 #define MOPT_QFMT       MOPT_NOSUPPORT
1819 #endif
1820 #define MOPT_NO_EXT2    0x0020
1821 #define MOPT_NO_EXT3    0x0040
1822 #define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1823 #define MOPT_SKIP       0x0080
1824 #define MOPT_2          0x0100
1825
1826 static const struct mount_opts {
1827         int     token;
1828         int     mount_opt;
1829         int     flags;
1830 } ext4_mount_opts[] = {
1831         {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1832         {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1833         {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1834         {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1835         {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1836         {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1837         {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1838          MOPT_EXT4_ONLY | MOPT_SET},
1839         {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1840          MOPT_EXT4_ONLY | MOPT_CLEAR},
1841         {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1842         {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1843         {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1844          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1845         {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1846          MOPT_EXT4_ONLY | MOPT_CLEAR},
1847         {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1848         {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1849         {Opt_commit, 0, MOPT_NO_EXT2},
1850         {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1851          MOPT_EXT4_ONLY | MOPT_CLEAR},
1852         {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1853          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1854         {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1855                                     EXT4_MOUNT_JOURNAL_CHECKSUM),
1856          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1857         {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1858         {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1859         {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1860         {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1861         {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1862         {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1863         {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1864         {Opt_dax_type, 0, MOPT_EXT4_ONLY},
1865         {Opt_journal_dev, 0, MOPT_NO_EXT2},
1866         {Opt_journal_path, 0, MOPT_NO_EXT2},
1867         {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1868         {Opt_data, 0, MOPT_NO_EXT2},
1869         {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1870 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1871         {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1872 #else
1873         {Opt_acl, 0, MOPT_NOSUPPORT},
1874 #endif
1875         {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1876         {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1877         {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1878         {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1879                                                         MOPT_SET | MOPT_Q},
1880         {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1881                                                         MOPT_SET | MOPT_Q},
1882         {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1883                                                         MOPT_SET | MOPT_Q},
1884         {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1885                        EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1886                                                         MOPT_CLEAR | MOPT_Q},
1887         {Opt_usrjquota, 0, MOPT_Q},
1888         {Opt_grpjquota, 0, MOPT_Q},
1889         {Opt_jqfmt, 0, MOPT_QFMT},
1890         {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1891         {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1892          MOPT_SET},
1893 #ifdef CONFIG_EXT4_DEBUG
1894         {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1895          MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1896 #endif
1897         {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
1898         {Opt_err, 0, 0}
1899 };
1900
1901 #if IS_ENABLED(CONFIG_UNICODE)
1902 static const struct ext4_sb_encodings {
1903         __u16 magic;
1904         char *name;
1905         unsigned int version;
1906 } ext4_sb_encoding_map[] = {
1907         {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1908 };
1909
1910 static const struct ext4_sb_encodings *
1911 ext4_sb_read_encoding(const struct ext4_super_block *es)
1912 {
1913         __u16 magic = le16_to_cpu(es->s_encoding);
1914         int i;
1915
1916         for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1917                 if (magic == ext4_sb_encoding_map[i].magic)
1918                         return &ext4_sb_encoding_map[i];
1919
1920         return NULL;
1921 }
1922 #endif
1923
1924 #define EXT4_SPEC_JQUOTA                        (1 <<  0)
1925 #define EXT4_SPEC_JQFMT                         (1 <<  1)
1926 #define EXT4_SPEC_DATAJ                         (1 <<  2)
1927 #define EXT4_SPEC_SB_BLOCK                      (1 <<  3)
1928 #define EXT4_SPEC_JOURNAL_DEV                   (1 <<  4)
1929 #define EXT4_SPEC_JOURNAL_IOPRIO                (1 <<  5)
1930 #define EXT4_SPEC_s_want_extra_isize            (1 <<  7)
1931 #define EXT4_SPEC_s_max_batch_time              (1 <<  8)
1932 #define EXT4_SPEC_s_min_batch_time              (1 <<  9)
1933 #define EXT4_SPEC_s_inode_readahead_blks        (1 << 10)
1934 #define EXT4_SPEC_s_li_wait_mult                (1 << 11)
1935 #define EXT4_SPEC_s_max_dir_size_kb             (1 << 12)
1936 #define EXT4_SPEC_s_stripe                      (1 << 13)
1937 #define EXT4_SPEC_s_resuid                      (1 << 14)
1938 #define EXT4_SPEC_s_resgid                      (1 << 15)
1939 #define EXT4_SPEC_s_commit_interval             (1 << 16)
1940 #define EXT4_SPEC_s_fc_debug_max_replay         (1 << 17)
1941 #define EXT4_SPEC_s_sb_block                    (1 << 18)
1942 #define EXT4_SPEC_mb_optimize_scan              (1 << 19)
1943
1944 struct ext4_fs_context {
1945         char            *s_qf_names[EXT4_MAXQUOTAS];
1946         struct fscrypt_dummy_policy dummy_enc_policy;
1947         int             s_jquota_fmt;   /* Format of quota to use */
1948 #ifdef CONFIG_EXT4_DEBUG
1949         int s_fc_debug_max_replay;
1950 #endif
1951         unsigned short  qname_spec;
1952         unsigned long   vals_s_flags;   /* Bits to set in s_flags */
1953         unsigned long   mask_s_flags;   /* Bits changed in s_flags */
1954         unsigned long   journal_devnum;
1955         unsigned long   s_commit_interval;
1956         unsigned long   s_stripe;
1957         unsigned int    s_inode_readahead_blks;
1958         unsigned int    s_want_extra_isize;
1959         unsigned int    s_li_wait_mult;
1960         unsigned int    s_max_dir_size_kb;
1961         unsigned int    journal_ioprio;
1962         unsigned int    vals_s_mount_opt;
1963         unsigned int    mask_s_mount_opt;
1964         unsigned int    vals_s_mount_opt2;
1965         unsigned int    mask_s_mount_opt2;
1966         unsigned int    opt_flags;      /* MOPT flags */
1967         unsigned int    spec;
1968         u32             s_max_batch_time;
1969         u32             s_min_batch_time;
1970         kuid_t          s_resuid;
1971         kgid_t          s_resgid;
1972         ext4_fsblk_t    s_sb_block;
1973 };
1974
1975 static void ext4_fc_free(struct fs_context *fc)
1976 {
1977         struct ext4_fs_context *ctx = fc->fs_private;
1978         int i;
1979
1980         if (!ctx)
1981                 return;
1982
1983         for (i = 0; i < EXT4_MAXQUOTAS; i++)
1984                 kfree(ctx->s_qf_names[i]);
1985
1986         fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
1987         kfree(ctx);
1988 }
1989
1990 int ext4_init_fs_context(struct fs_context *fc)
1991 {
1992         struct ext4_fs_context *ctx;
1993
1994         ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
1995         if (!ctx)
1996                 return -ENOMEM;
1997
1998         fc->fs_private = ctx;
1999         fc->ops = &ext4_context_ops;
2000
2001         return 0;
2002 }
2003
2004 #ifdef CONFIG_QUOTA
2005 /*
2006  * Note the name of the specified quota file.
2007  */
2008 static int note_qf_name(struct fs_context *fc, int qtype,
2009                        struct fs_parameter *param)
2010 {
2011         struct ext4_fs_context *ctx = fc->fs_private;
2012         char *qname;
2013
2014         if (param->size < 1) {
2015                 ext4_msg(NULL, KERN_ERR, "Missing quota name");
2016                 return -EINVAL;
2017         }
2018         if (strchr(param->string, '/')) {
2019                 ext4_msg(NULL, KERN_ERR,
2020                          "quotafile must be on filesystem root");
2021                 return -EINVAL;
2022         }
2023         if (ctx->s_qf_names[qtype]) {
2024                 if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
2025                         ext4_msg(NULL, KERN_ERR,
2026                                  "%s quota file already specified",
2027                                  QTYPE2NAME(qtype));
2028                         return -EINVAL;
2029                 }
2030                 return 0;
2031         }
2032
2033         qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
2034         if (!qname) {
2035                 ext4_msg(NULL, KERN_ERR,
2036                          "Not enough memory for storing quotafile name");
2037                 return -ENOMEM;
2038         }
2039         ctx->s_qf_names[qtype] = qname;
2040         ctx->qname_spec |= 1 << qtype;
2041         ctx->spec |= EXT4_SPEC_JQUOTA;
2042         return 0;
2043 }
2044
2045 /*
2046  * Clear the name of the specified quota file.
2047  */
2048 static int unnote_qf_name(struct fs_context *fc, int qtype)
2049 {
2050         struct ext4_fs_context *ctx = fc->fs_private;
2051
2052         kfree(ctx->s_qf_names[qtype]);
2053
2054         ctx->s_qf_names[qtype] = NULL;
2055         ctx->qname_spec |= 1 << qtype;
2056         ctx->spec |= EXT4_SPEC_JQUOTA;
2057         return 0;
2058 }
2059 #endif
2060
2061 static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2062                                             struct ext4_fs_context *ctx)
2063 {
2064         int err;
2065
2066         if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2067                 ext4_msg(NULL, KERN_WARNING,
2068                          "test_dummy_encryption option not supported");
2069                 return -EINVAL;
2070         }
2071         err = fscrypt_parse_test_dummy_encryption(param,
2072                                                   &ctx->dummy_enc_policy);
2073         if (err == -EINVAL) {
2074                 ext4_msg(NULL, KERN_WARNING,
2075                          "Value of option \"%s\" is unrecognized", param->key);
2076         } else if (err == -EEXIST) {
2077                 ext4_msg(NULL, KERN_WARNING,
2078                          "Conflicting test_dummy_encryption options");
2079                 return -EINVAL;
2080         }
2081         return err;
2082 }
2083
2084 #define EXT4_SET_CTX(name)                                              \
2085 static inline __maybe_unused                                            \
2086 void ctx_set_##name(struct ext4_fs_context *ctx, unsigned long flag)    \
2087 {                                                                       \
2088         ctx->mask_s_##name |= flag;                                     \
2089         ctx->vals_s_##name |= flag;                                     \
2090 }
2091
2092 #define EXT4_CLEAR_CTX(name)                                            \
2093 static inline __maybe_unused                                            \
2094 void ctx_clear_##name(struct ext4_fs_context *ctx, unsigned long flag)  \
2095 {                                                                       \
2096         ctx->mask_s_##name |= flag;                                     \
2097         ctx->vals_s_##name &= ~flag;                                    \
2098 }
2099
2100 #define EXT4_TEST_CTX(name)                                             \
2101 static inline unsigned long                                             \
2102 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag)        \
2103 {                                                                       \
2104         return (ctx->vals_s_##name & flag);                             \
2105 }
2106
2107 EXT4_SET_CTX(flags); /* set only */
2108 EXT4_SET_CTX(mount_opt);
2109 EXT4_CLEAR_CTX(mount_opt);
2110 EXT4_TEST_CTX(mount_opt);
2111 EXT4_SET_CTX(mount_opt2);
2112 EXT4_CLEAR_CTX(mount_opt2);
2113 EXT4_TEST_CTX(mount_opt2);
2114
2115 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2116 {
2117         struct ext4_fs_context *ctx = fc->fs_private;
2118         struct fs_parse_result result;
2119         const struct mount_opts *m;
2120         int is_remount;
2121         int token;
2122
2123         token = fs_parse(fc, ext4_param_specs, param, &result);
2124         if (token < 0)
2125                 return token;
2126         is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2127
2128         for (m = ext4_mount_opts; m->token != Opt_err; m++)
2129                 if (token == m->token)
2130                         break;
2131
2132         ctx->opt_flags |= m->flags;
2133
2134         if (m->flags & MOPT_EXPLICIT) {
2135                 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2136                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2137                 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2138                         ctx_set_mount_opt2(ctx,
2139                                        EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2140                 } else
2141                         return -EINVAL;
2142         }
2143
2144         if (m->flags & MOPT_NOSUPPORT) {
2145                 ext4_msg(NULL, KERN_ERR, "%s option not supported",
2146                          param->key);
2147                 return 0;
2148         }
2149
2150         switch (token) {
2151 #ifdef CONFIG_QUOTA
2152         case Opt_usrjquota:
2153                 if (!*param->string)
2154                         return unnote_qf_name(fc, USRQUOTA);
2155                 else
2156                         return note_qf_name(fc, USRQUOTA, param);
2157         case Opt_grpjquota:
2158                 if (!*param->string)
2159                         return unnote_qf_name(fc, GRPQUOTA);
2160                 else
2161                         return note_qf_name(fc, GRPQUOTA, param);
2162 #endif
2163         case Opt_sb:
2164                 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2165                         ext4_msg(NULL, KERN_WARNING,
2166                                  "Ignoring %s option on remount", param->key);
2167                 } else {
2168                         ctx->s_sb_block = result.uint_32;
2169                         ctx->spec |= EXT4_SPEC_s_sb_block;
2170                 }
2171                 return 0;
2172         case Opt_removed:
2173                 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2174                          param->key);
2175                 return 0;
2176         case Opt_inlinecrypt:
2177 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2178                 ctx_set_flags(ctx, SB_INLINECRYPT);
2179 #else
2180                 ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2181 #endif
2182                 return 0;
2183         case Opt_errors:
2184                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2185                 ctx_set_mount_opt(ctx, result.uint_32);
2186                 return 0;
2187 #ifdef CONFIG_QUOTA
2188         case Opt_jqfmt:
2189                 ctx->s_jquota_fmt = result.uint_32;
2190                 ctx->spec |= EXT4_SPEC_JQFMT;
2191                 return 0;
2192 #endif
2193         case Opt_data:
2194                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2195                 ctx_set_mount_opt(ctx, result.uint_32);
2196                 ctx->spec |= EXT4_SPEC_DATAJ;
2197                 return 0;
2198         case Opt_commit:
2199                 if (result.uint_32 == 0)
2200                         result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2201                 else if (result.uint_32 > INT_MAX / HZ) {
2202                         ext4_msg(NULL, KERN_ERR,
2203                                  "Invalid commit interval %d, "
2204                                  "must be smaller than %d",
2205                                  result.uint_32, INT_MAX / HZ);
2206                         return -EINVAL;
2207                 }
2208                 ctx->s_commit_interval = HZ * result.uint_32;
2209                 ctx->spec |= EXT4_SPEC_s_commit_interval;
2210                 return 0;
2211         case Opt_debug_want_extra_isize:
2212                 if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2213                         ext4_msg(NULL, KERN_ERR,
2214                                  "Invalid want_extra_isize %d", result.uint_32);
2215                         return -EINVAL;
2216                 }
2217                 ctx->s_want_extra_isize = result.uint_32;
2218                 ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2219                 return 0;
2220         case Opt_max_batch_time:
2221                 ctx->s_max_batch_time = result.uint_32;
2222                 ctx->spec |= EXT4_SPEC_s_max_batch_time;
2223                 return 0;
2224         case Opt_min_batch_time:
2225                 ctx->s_min_batch_time = result.uint_32;
2226                 ctx->spec |= EXT4_SPEC_s_min_batch_time;
2227                 return 0;
2228         case Opt_inode_readahead_blks:
2229                 if (result.uint_32 &&
2230                     (result.uint_32 > (1 << 30) ||
2231                      !is_power_of_2(result.uint_32))) {
2232                         ext4_msg(NULL, KERN_ERR,
2233                                  "EXT4-fs: inode_readahead_blks must be "
2234                                  "0 or a power of 2 smaller than 2^31");
2235                         return -EINVAL;
2236                 }
2237                 ctx->s_inode_readahead_blks = result.uint_32;
2238                 ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2239                 return 0;
2240         case Opt_init_itable:
2241                 ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2242                 ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2243                 if (param->type == fs_value_is_string)
2244                         ctx->s_li_wait_mult = result.uint_32;
2245                 ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2246                 return 0;
2247         case Opt_max_dir_size_kb:
2248                 ctx->s_max_dir_size_kb = result.uint_32;
2249                 ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2250                 return 0;
2251 #ifdef CONFIG_EXT4_DEBUG
2252         case Opt_fc_debug_max_replay:
2253                 ctx->s_fc_debug_max_replay = result.uint_32;
2254                 ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2255                 return 0;
2256 #endif
2257         case Opt_stripe:
2258                 ctx->s_stripe = result.uint_32;
2259                 ctx->spec |= EXT4_SPEC_s_stripe;
2260                 return 0;
2261         case Opt_resuid:
2262                 ctx->s_resuid = result.uid;
2263                 ctx->spec |= EXT4_SPEC_s_resuid;
2264                 return 0;
2265         case Opt_resgid:
2266                 ctx->s_resgid = result.gid;
2267                 ctx->spec |= EXT4_SPEC_s_resgid;
2268                 return 0;
2269         case Opt_journal_dev:
2270                 if (is_remount) {
2271                         ext4_msg(NULL, KERN_ERR,
2272                                  "Cannot specify journal on remount");
2273                         return -EINVAL;
2274                 }
2275                 ctx->journal_devnum = result.uint_32;
2276                 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2277                 return 0;
2278         case Opt_journal_path:
2279         {
2280                 struct inode *journal_inode;
2281                 struct path path;
2282                 int error;
2283
2284                 if (is_remount) {
2285                         ext4_msg(NULL, KERN_ERR,
2286                                  "Cannot specify journal on remount");
2287                         return -EINVAL;
2288                 }
2289
2290                 error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2291                 if (error) {
2292                         ext4_msg(NULL, KERN_ERR, "error: could not find "
2293                                  "journal device path");
2294                         return -EINVAL;
2295                 }
2296
2297                 journal_inode = d_inode(path.dentry);
2298                 ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2299                 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2300                 path_put(&path);
2301                 return 0;
2302         }
2303         case Opt_journal_ioprio:
2304                 if (result.uint_32 > 7) {
2305                         ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2306                                  " (must be 0-7)");
2307                         return -EINVAL;
2308                 }
2309                 ctx->journal_ioprio =
2310                         IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2311                 ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2312                 return 0;
2313         case Opt_test_dummy_encryption:
2314                 return ext4_parse_test_dummy_encryption(param, ctx);
2315         case Opt_dax:
2316         case Opt_dax_type:
2317 #ifdef CONFIG_FS_DAX
2318         {
2319                 int type = (token == Opt_dax) ?
2320                            Opt_dax : result.uint_32;
2321
2322                 switch (type) {
2323                 case Opt_dax:
2324                 case Opt_dax_always:
2325                         ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2326                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2327                         break;
2328                 case Opt_dax_never:
2329                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2330                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2331                         break;
2332                 case Opt_dax_inode:
2333                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2334                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2335                         /* Strictly for printing options */
2336                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2337                         break;
2338                 }
2339                 return 0;
2340         }
2341 #else
2342                 ext4_msg(NULL, KERN_INFO, "dax option not supported");
2343                 return -EINVAL;
2344 #endif
2345         case Opt_data_err:
2346                 if (result.uint_32 == Opt_data_err_abort)
2347                         ctx_set_mount_opt(ctx, m->mount_opt);
2348                 else if (result.uint_32 == Opt_data_err_ignore)
2349                         ctx_clear_mount_opt(ctx, m->mount_opt);
2350                 return 0;
2351         case Opt_mb_optimize_scan:
2352                 if (result.int_32 == 1) {
2353                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2354                         ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2355                 } else if (result.int_32 == 0) {
2356                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2357                         ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2358                 } else {
2359                         ext4_msg(NULL, KERN_WARNING,
2360                                  "mb_optimize_scan should be set to 0 or 1.");
2361                         return -EINVAL;
2362                 }
2363                 return 0;
2364         }
2365
2366         /*
2367          * At this point we should only be getting options requiring MOPT_SET,
2368          * or MOPT_CLEAR. Anything else is a bug
2369          */
2370         if (m->token == Opt_err) {
2371                 ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2372                          param->key);
2373                 WARN_ON(1);
2374                 return -EINVAL;
2375         }
2376
2377         else {
2378                 unsigned int set = 0;
2379
2380                 if ((param->type == fs_value_is_flag) ||
2381                     result.uint_32 > 0)
2382                         set = 1;
2383
2384                 if (m->flags & MOPT_CLEAR)
2385                         set = !set;
2386                 else if (unlikely(!(m->flags & MOPT_SET))) {
2387                         ext4_msg(NULL, KERN_WARNING,
2388                                  "buggy handling of option %s",
2389                                  param->key);
2390                         WARN_ON(1);
2391                         return -EINVAL;
2392                 }
2393                 if (m->flags & MOPT_2) {
2394                         if (set != 0)
2395                                 ctx_set_mount_opt2(ctx, m->mount_opt);
2396                         else
2397                                 ctx_clear_mount_opt2(ctx, m->mount_opt);
2398                 } else {
2399                         if (set != 0)
2400                                 ctx_set_mount_opt(ctx, m->mount_opt);
2401                         else
2402                                 ctx_clear_mount_opt(ctx, m->mount_opt);
2403                 }
2404         }
2405
2406         return 0;
2407 }
2408
2409 static int parse_options(struct fs_context *fc, char *options)
2410 {
2411         struct fs_parameter param;
2412         int ret;
2413         char *key;
2414
2415         if (!options)
2416                 return 0;
2417
2418         while ((key = strsep(&options, ",")) != NULL) {
2419                 if (*key) {
2420                         size_t v_len = 0;
2421                         char *value = strchr(key, '=');
2422
2423                         param.type = fs_value_is_flag;
2424                         param.string = NULL;
2425
2426                         if (value) {
2427                                 if (value == key)
2428                                         continue;
2429
2430                                 *value++ = 0;
2431                                 v_len = strlen(value);
2432                                 param.string = kmemdup_nul(value, v_len,
2433                                                            GFP_KERNEL);
2434                                 if (!param.string)
2435                                         return -ENOMEM;
2436                                 param.type = fs_value_is_string;
2437                         }
2438
2439                         param.key = key;
2440                         param.size = v_len;
2441
2442                         ret = ext4_parse_param(fc, &param);
2443                         kfree(param.string);
2444                         if (ret < 0)
2445                                 return ret;
2446                 }
2447         }
2448
2449         ret = ext4_validate_options(fc);
2450         if (ret < 0)
2451                 return ret;
2452
2453         return 0;
2454 }
2455
2456 static int parse_apply_sb_mount_options(struct super_block *sb,
2457                                         struct ext4_fs_context *m_ctx)
2458 {
2459         struct ext4_sb_info *sbi = EXT4_SB(sb);
2460         char *s_mount_opts = NULL;
2461         struct ext4_fs_context *s_ctx = NULL;
2462         struct fs_context *fc = NULL;
2463         int ret = -ENOMEM;
2464
2465         if (!sbi->s_es->s_mount_opts[0])
2466                 return 0;
2467
2468         s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2469                                 sizeof(sbi->s_es->s_mount_opts),
2470                                 GFP_KERNEL);
2471         if (!s_mount_opts)
2472                 return ret;
2473
2474         fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2475         if (!fc)
2476                 goto out_free;
2477
2478         s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2479         if (!s_ctx)
2480                 goto out_free;
2481
2482         fc->fs_private = s_ctx;
2483         fc->s_fs_info = sbi;
2484
2485         ret = parse_options(fc, s_mount_opts);
2486         if (ret < 0)
2487                 goto parse_failed;
2488
2489         ret = ext4_check_opt_consistency(fc, sb);
2490         if (ret < 0) {
2491 parse_failed:
2492                 ext4_msg(sb, KERN_WARNING,
2493                          "failed to parse options in superblock: %s",
2494                          s_mount_opts);
2495                 ret = 0;
2496                 goto out_free;
2497         }
2498
2499         if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2500                 m_ctx->journal_devnum = s_ctx->journal_devnum;
2501         if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2502                 m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2503
2504         ext4_apply_options(fc, sb);
2505         ret = 0;
2506
2507 out_free:
2508         if (fc) {
2509                 ext4_fc_free(fc);
2510                 kfree(fc);
2511         }
2512         kfree(s_mount_opts);
2513         return ret;
2514 }
2515
2516 static void ext4_apply_quota_options(struct fs_context *fc,
2517                                      struct super_block *sb)
2518 {
2519 #ifdef CONFIG_QUOTA
2520         bool quota_feature = ext4_has_feature_quota(sb);
2521         struct ext4_fs_context *ctx = fc->fs_private;
2522         struct ext4_sb_info *sbi = EXT4_SB(sb);
2523         char *qname;
2524         int i;
2525
2526         if (quota_feature)
2527                 return;
2528
2529         if (ctx->spec & EXT4_SPEC_JQUOTA) {
2530                 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2531                         if (!(ctx->qname_spec & (1 << i)))
2532                                 continue;
2533
2534                         qname = ctx->s_qf_names[i]; /* May be NULL */
2535                         if (qname)
2536                                 set_opt(sb, QUOTA);
2537                         ctx->s_qf_names[i] = NULL;
2538                         qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2539                                                 lockdep_is_held(&sb->s_umount));
2540                         if (qname)
2541                                 kfree_rcu_mightsleep(qname);
2542                 }
2543         }
2544
2545         if (ctx->spec & EXT4_SPEC_JQFMT)
2546                 sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2547 #endif
2548 }
2549
2550 /*
2551  * Check quota settings consistency.
2552  */
2553 static int ext4_check_quota_consistency(struct fs_context *fc,
2554                                         struct super_block *sb)
2555 {
2556 #ifdef CONFIG_QUOTA
2557         struct ext4_fs_context *ctx = fc->fs_private;
2558         struct ext4_sb_info *sbi = EXT4_SB(sb);
2559         bool quota_feature = ext4_has_feature_quota(sb);
2560         bool quota_loaded = sb_any_quota_loaded(sb);
2561         bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2562         int quota_flags, i;
2563
2564         /*
2565          * We do the test below only for project quotas. 'usrquota' and
2566          * 'grpquota' mount options are allowed even without quota feature
2567          * to support legacy quotas in quota files.
2568          */
2569         if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2570             !ext4_has_feature_project(sb)) {
2571                 ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2572                          "Cannot enable project quota enforcement.");
2573                 return -EINVAL;
2574         }
2575
2576         quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2577                       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2578         if (quota_loaded &&
2579             ctx->mask_s_mount_opt & quota_flags &&
2580             !ctx_test_mount_opt(ctx, quota_flags))
2581                 goto err_quota_change;
2582
2583         if (ctx->spec & EXT4_SPEC_JQUOTA) {
2584
2585                 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2586                         if (!(ctx->qname_spec & (1 << i)))
2587                                 continue;
2588
2589                         if (quota_loaded &&
2590                             !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2591                                 goto err_jquota_change;
2592
2593                         if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2594                             strcmp(get_qf_name(sb, sbi, i),
2595                                    ctx->s_qf_names[i]) != 0)
2596                                 goto err_jquota_specified;
2597                 }
2598
2599                 if (quota_feature) {
2600                         ext4_msg(NULL, KERN_INFO,
2601                                  "Journaled quota options ignored when "
2602                                  "QUOTA feature is enabled");
2603                         return 0;
2604                 }
2605         }
2606
2607         if (ctx->spec & EXT4_SPEC_JQFMT) {
2608                 if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2609                         goto err_jquota_change;
2610                 if (quota_feature) {
2611                         ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2612                                  "ignored when QUOTA feature is enabled");
2613                         return 0;
2614                 }
2615         }
2616
2617         /* Make sure we don't mix old and new quota format */
2618         usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2619                        ctx->s_qf_names[USRQUOTA]);
2620         grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2621                        ctx->s_qf_names[GRPQUOTA]);
2622
2623         usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2624                     test_opt(sb, USRQUOTA));
2625
2626         grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2627                     test_opt(sb, GRPQUOTA));
2628
2629         if (usr_qf_name) {
2630                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2631                 usrquota = false;
2632         }
2633         if (grp_qf_name) {
2634                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2635                 grpquota = false;
2636         }
2637
2638         if (usr_qf_name || grp_qf_name) {
2639                 if (usrquota || grpquota) {
2640                         ext4_msg(NULL, KERN_ERR, "old and new quota "
2641                                  "format mixing");
2642                         return -EINVAL;
2643                 }
2644
2645                 if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2646                         ext4_msg(NULL, KERN_ERR, "journaled quota format "
2647                                  "not specified");
2648                         return -EINVAL;
2649                 }
2650         }
2651
2652         return 0;
2653
2654 err_quota_change:
2655         ext4_msg(NULL, KERN_ERR,
2656                  "Cannot change quota options when quota turned on");
2657         return -EINVAL;
2658 err_jquota_change:
2659         ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2660                  "options when quota turned on");
2661         return -EINVAL;
2662 err_jquota_specified:
2663         ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2664                  QTYPE2NAME(i));
2665         return -EINVAL;
2666 #else
2667         return 0;
2668 #endif
2669 }
2670
2671 static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2672                                             struct super_block *sb)
2673 {
2674         const struct ext4_fs_context *ctx = fc->fs_private;
2675         const struct ext4_sb_info *sbi = EXT4_SB(sb);
2676
2677         if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2678                 return 0;
2679
2680         if (!ext4_has_feature_encrypt(sb)) {
2681                 ext4_msg(NULL, KERN_WARNING,
2682                          "test_dummy_encryption requires encrypt feature");
2683                 return -EINVAL;
2684         }
2685         /*
2686          * This mount option is just for testing, and it's not worthwhile to
2687          * implement the extra complexity (e.g. RCU protection) that would be
2688          * needed to allow it to be set or changed during remount.  We do allow
2689          * it to be specified during remount, but only if there is no change.
2690          */
2691         if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2692                 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2693                                                  &ctx->dummy_enc_policy))
2694                         return 0;
2695                 ext4_msg(NULL, KERN_WARNING,
2696                          "Can't set or change test_dummy_encryption on remount");
2697                 return -EINVAL;
2698         }
2699         /* Also make sure s_mount_opts didn't contain a conflicting value. */
2700         if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2701                 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2702                                                  &ctx->dummy_enc_policy))
2703                         return 0;
2704                 ext4_msg(NULL, KERN_WARNING,
2705                          "Conflicting test_dummy_encryption options");
2706                 return -EINVAL;
2707         }
2708         return 0;
2709 }
2710
2711 static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2712                                              struct super_block *sb)
2713 {
2714         if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2715             /* if already set, it was already verified to be the same */
2716             fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2717                 return;
2718         EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2719         memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2720         ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2721 }
2722
2723 static int ext4_check_opt_consistency(struct fs_context *fc,
2724                                       struct super_block *sb)
2725 {
2726         struct ext4_fs_context *ctx = fc->fs_private;
2727         struct ext4_sb_info *sbi = fc->s_fs_info;
2728         int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2729         int err;
2730
2731         if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2732                 ext4_msg(NULL, KERN_ERR,
2733                          "Mount option(s) incompatible with ext2");
2734                 return -EINVAL;
2735         }
2736         if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2737                 ext4_msg(NULL, KERN_ERR,
2738                          "Mount option(s) incompatible with ext3");
2739                 return -EINVAL;
2740         }
2741
2742         if (ctx->s_want_extra_isize >
2743             (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2744                 ext4_msg(NULL, KERN_ERR,
2745                          "Invalid want_extra_isize %d",
2746                          ctx->s_want_extra_isize);
2747                 return -EINVAL;
2748         }
2749
2750         err = ext4_check_test_dummy_encryption(fc, sb);
2751         if (err)
2752                 return err;
2753
2754         if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2755                 if (!sbi->s_journal) {
2756                         ext4_msg(NULL, KERN_WARNING,
2757                                  "Remounting file system with no journal "
2758                                  "so ignoring journalled data option");
2759                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2760                 } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2761                            test_opt(sb, DATA_FLAGS)) {
2762                         ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2763                                  "on remount");
2764                         return -EINVAL;
2765                 }
2766         }
2767
2768         if (is_remount) {
2769                 if (!sbi->s_journal &&
2770                     ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT)) {
2771                         ext4_msg(NULL, KERN_WARNING,
2772                                  "Remounting fs w/o journal so ignoring data_err option");
2773                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT);
2774                 }
2775
2776                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2777                     (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2778                         ext4_msg(NULL, KERN_ERR, "can't mount with "
2779                                  "both data=journal and dax");
2780                         return -EINVAL;
2781                 }
2782
2783                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2784                     (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2785                      (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2786 fail_dax_change_remount:
2787                         ext4_msg(NULL, KERN_ERR, "can't change "
2788                                  "dax mount option while remounting");
2789                         return -EINVAL;
2790                 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2791                          (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2792                           (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2793                         goto fail_dax_change_remount;
2794                 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2795                            ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2796                             (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2797                             !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2798                         goto fail_dax_change_remount;
2799                 }
2800         }
2801
2802         return ext4_check_quota_consistency(fc, sb);
2803 }
2804
2805 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2806 {
2807         struct ext4_fs_context *ctx = fc->fs_private;
2808         struct ext4_sb_info *sbi = fc->s_fs_info;
2809
2810         sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2811         sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2812         sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2813         sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2814         sb->s_flags &= ~ctx->mask_s_flags;
2815         sb->s_flags |= ctx->vals_s_flags;
2816
2817 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2818         APPLY(s_commit_interval);
2819         APPLY(s_stripe);
2820         APPLY(s_max_batch_time);
2821         APPLY(s_min_batch_time);
2822         APPLY(s_want_extra_isize);
2823         APPLY(s_inode_readahead_blks);
2824         APPLY(s_max_dir_size_kb);
2825         APPLY(s_li_wait_mult);
2826         APPLY(s_resgid);
2827         APPLY(s_resuid);
2828
2829 #ifdef CONFIG_EXT4_DEBUG
2830         APPLY(s_fc_debug_max_replay);
2831 #endif
2832
2833         ext4_apply_quota_options(fc, sb);
2834         ext4_apply_test_dummy_encryption(ctx, sb);
2835 }
2836
2837
2838 static int ext4_validate_options(struct fs_context *fc)
2839 {
2840 #ifdef CONFIG_QUOTA
2841         struct ext4_fs_context *ctx = fc->fs_private;
2842         char *usr_qf_name, *grp_qf_name;
2843
2844         usr_qf_name = ctx->s_qf_names[USRQUOTA];
2845         grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2846
2847         if (usr_qf_name || grp_qf_name) {
2848                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2849                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2850
2851                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2852                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2853
2854                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2855                     ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2856                         ext4_msg(NULL, KERN_ERR, "old and new quota "
2857                                  "format mixing");
2858                         return -EINVAL;
2859                 }
2860         }
2861 #endif
2862         return 1;
2863 }
2864
2865 static inline void ext4_show_quota_options(struct seq_file *seq,
2866                                            struct super_block *sb)
2867 {
2868 #if defined(CONFIG_QUOTA)
2869         struct ext4_sb_info *sbi = EXT4_SB(sb);
2870         char *usr_qf_name, *grp_qf_name;
2871
2872         if (sbi->s_jquota_fmt) {
2873                 char *fmtname = "";
2874
2875                 switch (sbi->s_jquota_fmt) {
2876                 case QFMT_VFS_OLD:
2877                         fmtname = "vfsold";
2878                         break;
2879                 case QFMT_VFS_V0:
2880                         fmtname = "vfsv0";
2881                         break;
2882                 case QFMT_VFS_V1:
2883                         fmtname = "vfsv1";
2884                         break;
2885                 }
2886                 seq_printf(seq, ",jqfmt=%s", fmtname);
2887         }
2888
2889         rcu_read_lock();
2890         usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2891         grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2892         if (usr_qf_name)
2893                 seq_show_option(seq, "usrjquota", usr_qf_name);
2894         if (grp_qf_name)
2895                 seq_show_option(seq, "grpjquota", grp_qf_name);
2896         rcu_read_unlock();
2897 #endif
2898 }
2899
2900 static const char *token2str(int token)
2901 {
2902         const struct fs_parameter_spec *spec;
2903
2904         for (spec = ext4_param_specs; spec->name != NULL; spec++)
2905                 if (spec->opt == token && !spec->type)
2906                         break;
2907         return spec->name;
2908 }
2909
2910 /*
2911  * Show an option if
2912  *  - it's set to a non-default value OR
2913  *  - if the per-sb default is different from the global default
2914  */
2915 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2916                               int nodefs)
2917 {
2918         struct ext4_sb_info *sbi = EXT4_SB(sb);
2919         struct ext4_super_block *es = sbi->s_es;
2920         int def_errors;
2921         const struct mount_opts *m;
2922         char sep = nodefs ? '\n' : ',';
2923
2924 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2925 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2926
2927         if (sbi->s_sb_block != 1)
2928                 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2929
2930         for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2931                 int want_set = m->flags & MOPT_SET;
2932                 int opt_2 = m->flags & MOPT_2;
2933                 unsigned int mount_opt, def_mount_opt;
2934
2935                 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2936                     m->flags & MOPT_SKIP)
2937                         continue;
2938
2939                 if (opt_2) {
2940                         mount_opt = sbi->s_mount_opt2;
2941                         def_mount_opt = sbi->s_def_mount_opt2;
2942                 } else {
2943                         mount_opt = sbi->s_mount_opt;
2944                         def_mount_opt = sbi->s_def_mount_opt;
2945                 }
2946                 /* skip if same as the default */
2947                 if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2948                         continue;
2949                 /* select Opt_noFoo vs Opt_Foo */
2950                 if ((want_set &&
2951                      (mount_opt & m->mount_opt) != m->mount_opt) ||
2952                     (!want_set && (mount_opt & m->mount_opt)))
2953                         continue;
2954                 SEQ_OPTS_PRINT("%s", token2str(m->token));
2955         }
2956
2957         if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2958             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2959                 SEQ_OPTS_PRINT("resuid=%u",
2960                                 from_kuid_munged(&init_user_ns, sbi->s_resuid));
2961         if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2962             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2963                 SEQ_OPTS_PRINT("resgid=%u",
2964                                 from_kgid_munged(&init_user_ns, sbi->s_resgid));
2965         def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2966         if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2967                 SEQ_OPTS_PUTS("errors=remount-ro");
2968         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2969                 SEQ_OPTS_PUTS("errors=continue");
2970         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2971                 SEQ_OPTS_PUTS("errors=panic");
2972         if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2973                 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2974         if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2975                 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2976         if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2977                 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2978         if (nodefs || sbi->s_stripe)
2979                 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2980         if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2981                         (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
2982                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2983                         SEQ_OPTS_PUTS("data=journal");
2984                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2985                         SEQ_OPTS_PUTS("data=ordered");
2986                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2987                         SEQ_OPTS_PUTS("data=writeback");
2988         }
2989         if (nodefs ||
2990             sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2991                 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2992                                sbi->s_inode_readahead_blks);
2993
2994         if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
2995                        (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2996                 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2997         if (nodefs || sbi->s_max_dir_size_kb)
2998                 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2999         if (test_opt(sb, DATA_ERR_ABORT))
3000                 SEQ_OPTS_PUTS("data_err=abort");
3001
3002         fscrypt_show_test_dummy_encryption(seq, sep, sb);
3003
3004         if (sb->s_flags & SB_INLINECRYPT)
3005                 SEQ_OPTS_PUTS("inlinecrypt");
3006
3007         if (test_opt(sb, DAX_ALWAYS)) {
3008                 if (IS_EXT2_SB(sb))
3009                         SEQ_OPTS_PUTS("dax");
3010                 else
3011                         SEQ_OPTS_PUTS("dax=always");
3012         } else if (test_opt2(sb, DAX_NEVER)) {
3013                 SEQ_OPTS_PUTS("dax=never");
3014         } else if (test_opt2(sb, DAX_INODE)) {
3015                 SEQ_OPTS_PUTS("dax=inode");
3016         }
3017
3018         if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3019                         !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3020                 SEQ_OPTS_PUTS("mb_optimize_scan=0");
3021         } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3022                         test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3023                 SEQ_OPTS_PUTS("mb_optimize_scan=1");
3024         }
3025
3026         if (nodefs && !test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS))
3027                 SEQ_OPTS_PUTS("prefetch_block_bitmaps");
3028
3029         if (ext4_emergency_ro(sb))
3030                 SEQ_OPTS_PUTS("emergency_ro");
3031
3032         if (ext4_forced_shutdown(sb))
3033                 SEQ_OPTS_PUTS("shutdown");
3034
3035         ext4_show_quota_options(seq, sb);
3036         return 0;
3037 }
3038
3039 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3040 {
3041         return _ext4_show_options(seq, root->d_sb, 0);
3042 }
3043
3044 int ext4_seq_options_show(struct seq_file *seq, void *offset)
3045 {
3046         struct super_block *sb = seq->private;
3047         int rc;
3048
3049         seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3050         rc = _ext4_show_options(seq, sb, 1);
3051         seq_putc(seq, '\n');
3052         return rc;
3053 }
3054
3055 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3056                             int read_only)
3057 {
3058         struct ext4_sb_info *sbi = EXT4_SB(sb);
3059         int err = 0;
3060
3061         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3062                 ext4_msg(sb, KERN_ERR, "revision level too high, "
3063                          "forcing read-only mode");
3064                 err = -EROFS;
3065                 goto done;
3066         }
3067         if (read_only)
3068                 goto done;
3069         if (!(sbi->s_mount_state & EXT4_VALID_FS))
3070                 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3071                          "running e2fsck is recommended");
3072         else if (sbi->s_mount_state & EXT4_ERROR_FS)
3073                 ext4_msg(sb, KERN_WARNING,
3074                          "warning: mounting fs with errors, "
3075                          "running e2fsck is recommended");
3076         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3077                  le16_to_cpu(es->s_mnt_count) >=
3078                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3079                 ext4_msg(sb, KERN_WARNING,
3080                          "warning: maximal mount count reached, "
3081                          "running e2fsck is recommended");
3082         else if (le32_to_cpu(es->s_checkinterval) &&
3083                  (ext4_get_tstamp(es, s_lastcheck) +
3084                   le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3085                 ext4_msg(sb, KERN_WARNING,
3086                          "warning: checktime reached, "
3087                          "running e2fsck is recommended");
3088         if (!sbi->s_journal)
3089                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3090         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3091                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3092         le16_add_cpu(&es->s_mnt_count, 1);
3093         ext4_update_tstamp(es, s_mtime);
3094         if (sbi->s_journal) {
3095                 ext4_set_feature_journal_needs_recovery(sb);
3096                 if (ext4_has_feature_orphan_file(sb))
3097                         ext4_set_feature_orphan_present(sb);
3098         }
3099
3100         err = ext4_commit_super(sb);
3101 done:
3102         if (test_opt(sb, DEBUG))
3103                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3104                                 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3105                         sb->s_blocksize,
3106                         sbi->s_groups_count,
3107                         EXT4_BLOCKS_PER_GROUP(sb),
3108                         EXT4_INODES_PER_GROUP(sb),
3109                         sbi->s_mount_opt, sbi->s_mount_opt2);
3110         return err;
3111 }
3112
3113 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3114 {
3115         struct ext4_sb_info *sbi = EXT4_SB(sb);
3116         struct flex_groups **old_groups, **new_groups;
3117         int size, i, j;
3118
3119         if (!sbi->s_log_groups_per_flex)
3120                 return 0;
3121
3122         size = ext4_flex_group(sbi, ngroup - 1) + 1;
3123         if (size <= sbi->s_flex_groups_allocated)
3124                 return 0;
3125
3126         new_groups = kvzalloc(roundup_pow_of_two(size *
3127                               sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3128         if (!new_groups) {
3129                 ext4_msg(sb, KERN_ERR,
3130                          "not enough memory for %d flex group pointers", size);
3131                 return -ENOMEM;
3132         }
3133         for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3134                 new_groups[i] = kvzalloc(roundup_pow_of_two(
3135                                          sizeof(struct flex_groups)),
3136                                          GFP_KERNEL);
3137                 if (!new_groups[i]) {
3138                         for (j = sbi->s_flex_groups_allocated; j < i; j++)
3139                                 kvfree(new_groups[j]);
3140                         kvfree(new_groups);
3141                         ext4_msg(sb, KERN_ERR,
3142                                  "not enough memory for %d flex groups", size);
3143                         return -ENOMEM;
3144                 }
3145         }
3146         rcu_read_lock();
3147         old_groups = rcu_dereference(sbi->s_flex_groups);
3148         if (old_groups)
3149                 memcpy(new_groups, old_groups,
3150                        (sbi->s_flex_groups_allocated *
3151                         sizeof(struct flex_groups *)));
3152         rcu_read_unlock();
3153         rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3154         sbi->s_flex_groups_allocated = size;
3155         if (old_groups)
3156                 ext4_kvfree_array_rcu(old_groups);
3157         return 0;
3158 }
3159
3160 static int ext4_fill_flex_info(struct super_block *sb)
3161 {
3162         struct ext4_sb_info *sbi = EXT4_SB(sb);
3163         struct ext4_group_desc *gdp = NULL;
3164         struct flex_groups *fg;
3165         ext4_group_t flex_group;
3166         int i, err;
3167
3168         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3169         if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3170                 sbi->s_log_groups_per_flex = 0;
3171                 return 1;
3172         }
3173
3174         err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3175         if (err)
3176                 goto failed;
3177
3178         for (i = 0; i < sbi->s_groups_count; i++) {
3179                 gdp = ext4_get_group_desc(sb, i, NULL);
3180
3181                 flex_group = ext4_flex_group(sbi, i);
3182                 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3183                 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3184                 atomic64_add(ext4_free_group_clusters(sb, gdp),
3185                              &fg->free_clusters);
3186                 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3187         }
3188
3189         return 1;
3190 failed:
3191         return 0;
3192 }
3193
3194 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3195                                    struct ext4_group_desc *gdp)
3196 {
3197         int offset = offsetof(struct ext4_group_desc, bg_checksum);
3198         __u16 crc = 0;
3199         __le32 le_group = cpu_to_le32(block_group);
3200         struct ext4_sb_info *sbi = EXT4_SB(sb);
3201
3202         if (ext4_has_feature_metadata_csum(sbi->s_sb)) {
3203                 /* Use new metadata_csum algorithm */
3204                 __u32 csum32;
3205                 __u16 dummy_csum = 0;
3206
3207                 csum32 = ext4_chksum(sbi->s_csum_seed, (__u8 *)&le_group,
3208                                      sizeof(le_group));
3209                 csum32 = ext4_chksum(csum32, (__u8 *)gdp, offset);
3210                 csum32 = ext4_chksum(csum32, (__u8 *)&dummy_csum,
3211                                      sizeof(dummy_csum));
3212                 offset += sizeof(dummy_csum);
3213                 if (offset < sbi->s_desc_size)
3214                         csum32 = ext4_chksum(csum32, (__u8 *)gdp + offset,
3215                                              sbi->s_desc_size - offset);
3216
3217                 crc = csum32 & 0xFFFF;
3218                 goto out;
3219         }
3220
3221         /* old crc16 code */
3222         if (!ext4_has_feature_gdt_csum(sb))
3223                 return 0;
3224
3225         crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3226         crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3227         crc = crc16(crc, (__u8 *)gdp, offset);
3228         offset += sizeof(gdp->bg_checksum); /* skip checksum */
3229         /* for checksum of struct ext4_group_desc do the rest...*/
3230         if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3231                 crc = crc16(crc, (__u8 *)gdp + offset,
3232                             sbi->s_desc_size - offset);
3233
3234 out:
3235         return cpu_to_le16(crc);
3236 }
3237
3238 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3239                                 struct ext4_group_desc *gdp)
3240 {
3241         if (ext4_has_group_desc_csum(sb) &&
3242             (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3243                 return 0;
3244
3245         return 1;
3246 }
3247
3248 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3249                               struct ext4_group_desc *gdp)
3250 {
3251         if (!ext4_has_group_desc_csum(sb))
3252                 return;
3253         gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3254 }
3255
3256 /* Called at mount-time, super-block is locked */
3257 static int ext4_check_descriptors(struct super_block *sb,
3258                                   ext4_fsblk_t sb_block,
3259                                   ext4_group_t *first_not_zeroed)
3260 {
3261         struct ext4_sb_info *sbi = EXT4_SB(sb);
3262         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3263         ext4_fsblk_t last_block;
3264         ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3265         ext4_fsblk_t block_bitmap;
3266         ext4_fsblk_t inode_bitmap;
3267         ext4_fsblk_t inode_table;
3268         int flexbg_flag = 0;
3269         ext4_group_t i, grp = sbi->s_groups_count;
3270
3271         if (ext4_has_feature_flex_bg(sb))
3272                 flexbg_flag = 1;
3273
3274         ext4_debug("Checking group descriptors");
3275
3276         for (i = 0; i < sbi->s_groups_count; i++) {
3277                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3278
3279                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
3280                         last_block = ext4_blocks_count(sbi->s_es) - 1;
3281                 else
3282                         last_block = first_block +
3283                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
3284
3285                 if ((grp == sbi->s_groups_count) &&
3286                    !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3287                         grp = i;
3288
3289                 block_bitmap = ext4_block_bitmap(sb, gdp);
3290                 if (block_bitmap == sb_block) {
3291                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3292                                  "Block bitmap for group %u overlaps "
3293                                  "superblock", i);
3294                         if (!sb_rdonly(sb))
3295                                 return 0;
3296                 }
3297                 if (block_bitmap >= sb_block + 1 &&
3298                     block_bitmap <= last_bg_block) {
3299                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3300                                  "Block bitmap for group %u overlaps "
3301                                  "block group descriptors", i);
3302                         if (!sb_rdonly(sb))
3303                                 return 0;
3304                 }
3305                 if (block_bitmap < first_block || block_bitmap > last_block) {
3306                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3307                                "Block bitmap for group %u not in group "
3308                                "(block %llu)!", i, block_bitmap);
3309                         return 0;
3310                 }
3311                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
3312                 if (inode_bitmap == sb_block) {
3313                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3314                                  "Inode bitmap for group %u overlaps "
3315                                  "superblock", i);
3316                         if (!sb_rdonly(sb))
3317                                 return 0;
3318                 }
3319                 if (inode_bitmap >= sb_block + 1 &&
3320                     inode_bitmap <= last_bg_block) {
3321                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3322                                  "Inode bitmap for group %u overlaps "
3323                                  "block group descriptors", i);
3324                         if (!sb_rdonly(sb))
3325                                 return 0;
3326                 }
3327                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
3328                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3329                                "Inode bitmap for group %u not in group "
3330                                "(block %llu)!", i, inode_bitmap);
3331                         return 0;
3332                 }
3333                 inode_table = ext4_inode_table(sb, gdp);
3334                 if (inode_table == sb_block) {
3335                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3336                                  "Inode table for group %u overlaps "
3337                                  "superblock", i);
3338                         if (!sb_rdonly(sb))
3339                                 return 0;
3340                 }
3341                 if (inode_table >= sb_block + 1 &&
3342                     inode_table <= last_bg_block) {
3343                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3344                                  "Inode table for group %u overlaps "
3345                                  "block group descriptors", i);
3346                         if (!sb_rdonly(sb))
3347                                 return 0;
3348                 }
3349                 if (inode_table < first_block ||
3350                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
3351                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3352                                "Inode table for group %u not in group "
3353                                "(block %llu)!", i, inode_table);
3354                         return 0;
3355                 }
3356                 ext4_lock_group(sb, i);
3357                 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3358                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3359                                  "Checksum for group %u failed (%u!=%u)",
3360                                  i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3361                                      gdp)), le16_to_cpu(gdp->bg_checksum));
3362                         if (!sb_rdonly(sb)) {
3363                                 ext4_unlock_group(sb, i);
3364                                 return 0;
3365                         }
3366                 }
3367                 ext4_unlock_group(sb, i);
3368                 if (!flexbg_flag)
3369                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
3370         }
3371         if (NULL != first_not_zeroed)
3372                 *first_not_zeroed = grp;
3373         return 1;
3374 }
3375
3376 /*
3377  * Maximal extent format file size.
3378  * Resulting logical blkno at s_maxbytes must fit in our on-disk
3379  * extent format containers, within a sector_t, and within i_blocks
3380  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
3381  * so that won't be a limiting factor.
3382  *
3383  * However there is other limiting factor. We do store extents in the form
3384  * of starting block and length, hence the resulting length of the extent
3385  * covering maximum file size must fit into on-disk format containers as
3386  * well. Given that length is always by 1 unit bigger than max unit (because
3387  * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3388  *
3389  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3390  */
3391 static loff_t ext4_max_size(int blkbits, int has_huge_files)
3392 {
3393         loff_t res;
3394         loff_t upper_limit = MAX_LFS_FILESIZE;
3395
3396         BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3397
3398         if (!has_huge_files) {
3399                 upper_limit = (1LL << 32) - 1;
3400
3401                 /* total blocks in file system block size */
3402                 upper_limit >>= (blkbits - 9);
3403                 upper_limit <<= blkbits;
3404         }
3405
3406         /*
3407          * 32-bit extent-start container, ee_block. We lower the maxbytes
3408          * by one fs block, so ee_len can cover the extent of maximum file
3409          * size
3410          */
3411         res = (1LL << 32) - 1;
3412         res <<= blkbits;
3413
3414         /* Sanity check against vm- & vfs- imposed limits */
3415         if (res > upper_limit)
3416                 res = upper_limit;
3417
3418         return res;
3419 }
3420
3421 /*
3422  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
3423  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3424  * We need to be 1 filesystem block less than the 2^48 sector limit.
3425  */
3426 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3427 {
3428         loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3429         int meta_blocks;
3430         unsigned int ppb = 1 << (bits - 2);
3431
3432         /*
3433          * This is calculated to be the largest file size for a dense, block
3434          * mapped file such that the file's total number of 512-byte sectors,
3435          * including data and all indirect blocks, does not exceed (2^48 - 1).
3436          *
3437          * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3438          * number of 512-byte sectors of the file.
3439          */
3440         if (!has_huge_files) {
3441                 /*
3442                  * !has_huge_files or implies that the inode i_block field
3443                  * represents total file blocks in 2^32 512-byte sectors ==
3444                  * size of vfs inode i_blocks * 8
3445                  */
3446                 upper_limit = (1LL << 32) - 1;
3447
3448                 /* total blocks in file system block size */
3449                 upper_limit >>= (bits - 9);
3450
3451         } else {
3452                 /*
3453                  * We use 48 bit ext4_inode i_blocks
3454                  * With EXT4_HUGE_FILE_FL set the i_blocks
3455                  * represent total number of blocks in
3456                  * file system block size
3457                  */
3458                 upper_limit = (1LL << 48) - 1;
3459
3460         }
3461
3462         /* Compute how many blocks we can address by block tree */
3463         res += ppb;
3464         res += ppb * ppb;
3465         res += ((loff_t)ppb) * ppb * ppb;
3466         /* Compute how many metadata blocks are needed */
3467         meta_blocks = 1;
3468         meta_blocks += 1 + ppb;
3469         meta_blocks += 1 + ppb + ppb * ppb;
3470         /* Does block tree limit file size? */
3471         if (res + meta_blocks <= upper_limit)
3472                 goto check_lfs;
3473
3474         res = upper_limit;
3475         /* How many metadata blocks are needed for addressing upper_limit? */
3476         upper_limit -= EXT4_NDIR_BLOCKS;
3477         /* indirect blocks */
3478         meta_blocks = 1;
3479         upper_limit -= ppb;
3480         /* double indirect blocks */
3481         if (upper_limit < ppb * ppb) {
3482                 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3483                 res -= meta_blocks;
3484                 goto check_lfs;
3485         }
3486         meta_blocks += 1 + ppb;
3487         upper_limit -= ppb * ppb;
3488         /* tripple indirect blocks for the rest */
3489         meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3490                 DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3491         res -= meta_blocks;
3492 check_lfs:
3493         res <<= bits;
3494         if (res > MAX_LFS_FILESIZE)
3495                 res = MAX_LFS_FILESIZE;
3496
3497         return res;
3498 }
3499
3500 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3501                                    ext4_fsblk_t logical_sb_block, int nr)
3502 {
3503         struct ext4_sb_info *sbi = EXT4_SB(sb);
3504         ext4_group_t bg, first_meta_bg;
3505         int has_super = 0;
3506
3507         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3508
3509         if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3510                 return logical_sb_block + nr + 1;
3511         bg = sbi->s_desc_per_block * nr;
3512         if (ext4_bg_has_super(sb, bg))
3513                 has_super = 1;
3514
3515         /*
3516          * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3517          * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
3518          * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3519          * compensate.
3520          */
3521         if (sb->s_blocksize == 1024 && nr == 0 &&
3522             le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3523                 has_super++;
3524
3525         return (has_super + ext4_group_first_block_no(sb, bg));
3526 }
3527
3528 /**
3529  * ext4_get_stripe_size: Get the stripe size.
3530  * @sbi: In memory super block info
3531  *
3532  * If we have specified it via mount option, then
3533  * use the mount option value. If the value specified at mount time is
3534  * greater than the blocks per group use the super block value.
3535  * If the super block value is greater than blocks per group return 0.
3536  * Allocator needs it be less than blocks per group.
3537  *
3538  */
3539 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3540 {
3541         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3542         unsigned long stripe_width =
3543                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3544         int ret;
3545
3546         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3547                 ret = sbi->s_stripe;
3548         else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3549                 ret = stripe_width;
3550         else if (stride && stride <= sbi->s_blocks_per_group)
3551                 ret = stride;
3552         else
3553                 ret = 0;
3554
3555         /*
3556          * If the stripe width is 1, this makes no sense and
3557          * we set it to 0 to turn off stripe handling code.
3558          */
3559         if (ret <= 1)
3560                 ret = 0;
3561
3562         return ret;
3563 }
3564
3565 /*
3566  * Check whether this filesystem can be mounted based on
3567  * the features present and the RDONLY/RDWR mount requested.
3568  * Returns 1 if this filesystem can be mounted as requested,
3569  * 0 if it cannot be.
3570  */
3571 int ext4_feature_set_ok(struct super_block *sb, int readonly)
3572 {
3573         if (ext4_has_unknown_ext4_incompat_features(sb)) {
3574                 ext4_msg(sb, KERN_ERR,
3575                         "Couldn't mount because of "
3576                         "unsupported optional features (%x)",
3577                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3578                         ~EXT4_FEATURE_INCOMPAT_SUPP));
3579                 return 0;
3580         }
3581
3582         if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
3583                 ext4_msg(sb, KERN_ERR,
3584                          "Filesystem with casefold feature cannot be "
3585                          "mounted without CONFIG_UNICODE");
3586                 return 0;
3587         }
3588
3589         if (readonly)
3590                 return 1;
3591
3592         if (ext4_has_feature_readonly(sb)) {
3593                 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3594                 sb->s_flags |= SB_RDONLY;
3595                 return 1;
3596         }
3597
3598         /* Check that feature set is OK for a read-write mount */
3599         if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3600                 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3601                          "unsupported optional features (%x)",
3602                          (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3603                                 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3604                 return 0;
3605         }
3606         if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3607                 ext4_msg(sb, KERN_ERR,
3608                          "Can't support bigalloc feature without "
3609                          "extents feature\n");
3610                 return 0;
3611         }
3612
3613 #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3614         if (!readonly && (ext4_has_feature_quota(sb) ||
3615                           ext4_has_feature_project(sb))) {
3616                 ext4_msg(sb, KERN_ERR,
3617                          "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3618                 return 0;
3619         }
3620 #endif  /* CONFIG_QUOTA */
3621         return 1;
3622 }
3623
3624 /*
3625  * This function is called once a day if we have errors logged
3626  * on the file system
3627  */
3628 static void print_daily_error_info(struct timer_list *t)
3629 {
3630         struct ext4_sb_info *sbi = timer_container_of(sbi, t, s_err_report);
3631         struct super_block *sb = sbi->s_sb;
3632         struct ext4_super_block *es = sbi->s_es;
3633
3634         if (es->s_error_count)
3635                 /* fsck newer than v1.41.13 is needed to clean this condition. */
3636                 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3637                          le32_to_cpu(es->s_error_count));
3638         if (es->s_first_error_time) {
3639                 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3640                        sb->s_id,
3641                        ext4_get_tstamp(es, s_first_error_time),
3642                        (int) sizeof(es->s_first_error_func),
3643                        es->s_first_error_func,
3644                        le32_to_cpu(es->s_first_error_line));
3645                 if (es->s_first_error_ino)
3646                         printk(KERN_CONT ": inode %u",
3647                                le32_to_cpu(es->s_first_error_ino));
3648                 if (es->s_first_error_block)
3649                         printk(KERN_CONT ": block %llu", (unsigned long long)
3650                                le64_to_cpu(es->s_first_error_block));
3651                 printk(KERN_CONT "\n");
3652         }
3653         if (es->s_last_error_time) {
3654                 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3655                        sb->s_id,
3656                        ext4_get_tstamp(es, s_last_error_time),
3657                        (int) sizeof(es->s_last_error_func),
3658                        es->s_last_error_func,
3659                        le32_to_cpu(es->s_last_error_line));
3660                 if (es->s_last_error_ino)
3661                         printk(KERN_CONT ": inode %u",
3662                                le32_to_cpu(es->s_last_error_ino));
3663                 if (es->s_last_error_block)
3664                         printk(KERN_CONT ": block %llu", (unsigned long long)
3665                                le64_to_cpu(es->s_last_error_block));
3666                 printk(KERN_CONT "\n");
3667         }
3668         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3669 }
3670
3671 /* Find next suitable group and run ext4_init_inode_table */
3672 static int ext4_run_li_request(struct ext4_li_request *elr)
3673 {
3674         struct ext4_group_desc *gdp = NULL;
3675         struct super_block *sb = elr->lr_super;
3676         ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3677         ext4_group_t group = elr->lr_next_group;
3678         unsigned int prefetch_ios = 0;
3679         int ret = 0;
3680         int nr = EXT4_SB(sb)->s_mb_prefetch;
3681         u64 start_time;
3682
3683         if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3684                 elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
3685                 ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
3686                 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
3687                 if (group >= elr->lr_next_group) {
3688                         ret = 1;
3689                         if (elr->lr_first_not_zeroed != ngroups &&
3690                             !ext4_emergency_state(sb) && !sb_rdonly(sb) &&
3691                             test_opt(sb, INIT_INODE_TABLE)) {
3692                                 elr->lr_next_group = elr->lr_first_not_zeroed;
3693                                 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3694                                 ret = 0;
3695                         }
3696                 }
3697                 return ret;
3698         }
3699
3700         for (; group < ngroups; group++) {
3701                 gdp = ext4_get_group_desc(sb, group, NULL);
3702                 if (!gdp) {
3703                         ret = 1;
3704                         break;
3705                 }
3706
3707                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3708                         break;
3709         }
3710
3711         if (group >= ngroups)
3712                 ret = 1;
3713
3714         if (!ret) {
3715                 start_time = ktime_get_ns();
3716                 ret = ext4_init_inode_table(sb, group,
3717                                             elr->lr_timeout ? 0 : 1);
3718                 trace_ext4_lazy_itable_init(sb, group);
3719                 if (elr->lr_timeout == 0) {
3720                         elr->lr_timeout = nsecs_to_jiffies((ktime_get_ns() - start_time) *
3721                                 EXT4_SB(elr->lr_super)->s_li_wait_mult);
3722                 }
3723                 elr->lr_next_sched = jiffies + elr->lr_timeout;
3724                 elr->lr_next_group = group + 1;
3725         }
3726         return ret;
3727 }
3728
3729 /*
3730  * Remove lr_request from the list_request and free the
3731  * request structure. Should be called with li_list_mtx held
3732  */
3733 static void ext4_remove_li_request(struct ext4_li_request *elr)
3734 {
3735         if (!elr)
3736                 return;
3737
3738         list_del(&elr->lr_request);
3739         EXT4_SB(elr->lr_super)->s_li_request = NULL;
3740         kfree(elr);
3741 }
3742
3743 static void ext4_unregister_li_request(struct super_block *sb)
3744 {
3745         mutex_lock(&ext4_li_mtx);
3746         if (!ext4_li_info) {
3747                 mutex_unlock(&ext4_li_mtx);
3748                 return;
3749         }
3750
3751         mutex_lock(&ext4_li_info->li_list_mtx);
3752         ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3753         mutex_unlock(&ext4_li_info->li_list_mtx);
3754         mutex_unlock(&ext4_li_mtx);
3755 }
3756
3757 static struct task_struct *ext4_lazyinit_task;
3758
3759 /*
3760  * This is the function where ext4lazyinit thread lives. It walks
3761  * through the request list searching for next scheduled filesystem.
3762  * When such a fs is found, run the lazy initialization request
3763  * (ext4_rn_li_request) and keep track of the time spend in this
3764  * function. Based on that time we compute next schedule time of
3765  * the request. When walking through the list is complete, compute
3766  * next waking time and put itself into sleep.
3767  */
3768 static int ext4_lazyinit_thread(void *arg)
3769 {
3770         struct ext4_lazy_init *eli = arg;
3771         struct list_head *pos, *n;
3772         struct ext4_li_request *elr;
3773         unsigned long next_wakeup, cur;
3774
3775         BUG_ON(NULL == eli);
3776         set_freezable();
3777
3778 cont_thread:
3779         while (true) {
3780                 bool next_wakeup_initialized = false;
3781
3782                 next_wakeup = 0;
3783                 mutex_lock(&eli->li_list_mtx);
3784                 if (list_empty(&eli->li_request_list)) {
3785                         mutex_unlock(&eli->li_list_mtx);
3786                         goto exit_thread;
3787                 }
3788                 list_for_each_safe(pos, n, &eli->li_request_list) {
3789                         int err = 0;
3790                         int progress = 0;
3791                         elr = list_entry(pos, struct ext4_li_request,
3792                                          lr_request);
3793
3794                         if (time_before(jiffies, elr->lr_next_sched)) {
3795                                 if (!next_wakeup_initialized ||
3796                                     time_before(elr->lr_next_sched, next_wakeup)) {
3797                                         next_wakeup = elr->lr_next_sched;
3798                                         next_wakeup_initialized = true;
3799                                 }
3800                                 continue;
3801                         }
3802                         if (down_read_trylock(&elr->lr_super->s_umount)) {
3803                                 if (sb_start_write_trylock(elr->lr_super)) {
3804                                         progress = 1;
3805                                         /*
3806                                          * We hold sb->s_umount, sb can not
3807                                          * be removed from the list, it is
3808                                          * now safe to drop li_list_mtx
3809                                          */
3810                                         mutex_unlock(&eli->li_list_mtx);
3811                                         err = ext4_run_li_request(elr);
3812                                         sb_end_write(elr->lr_super);
3813                                         mutex_lock(&eli->li_list_mtx);
3814                                         n = pos->next;
3815                                 }
3816                                 up_read((&elr->lr_super->s_umount));
3817                         }
3818                         /* error, remove the lazy_init job */
3819                         if (err) {
3820                                 ext4_remove_li_request(elr);
3821                                 continue;
3822                         }
3823                         if (!progress) {
3824                                 elr->lr_next_sched = jiffies +
3825                                         get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3826                         }
3827                         if (!next_wakeup_initialized ||
3828                             time_before(elr->lr_next_sched, next_wakeup)) {
3829                                 next_wakeup = elr->lr_next_sched;
3830                                 next_wakeup_initialized = true;
3831                         }
3832                 }
3833                 mutex_unlock(&eli->li_list_mtx);
3834
3835                 try_to_freeze();
3836
3837                 cur = jiffies;
3838                 if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) {
3839                         cond_resched();
3840                         continue;
3841                 }
3842
3843                 schedule_timeout_interruptible(next_wakeup - cur);
3844
3845                 if (kthread_should_stop()) {
3846                         ext4_clear_request_list();
3847                         goto exit_thread;
3848                 }
3849         }
3850
3851 exit_thread:
3852         /*
3853          * It looks like the request list is empty, but we need
3854          * to check it under the li_list_mtx lock, to prevent any
3855          * additions into it, and of course we should lock ext4_li_mtx
3856          * to atomically free the list and ext4_li_info, because at
3857          * this point another ext4 filesystem could be registering
3858          * new one.
3859          */
3860         mutex_lock(&ext4_li_mtx);
3861         mutex_lock(&eli->li_list_mtx);
3862         if (!list_empty(&eli->li_request_list)) {
3863                 mutex_unlock(&eli->li_list_mtx);
3864                 mutex_unlock(&ext4_li_mtx);
3865                 goto cont_thread;
3866         }
3867         mutex_unlock(&eli->li_list_mtx);
3868         kfree(ext4_li_info);
3869         ext4_li_info = NULL;
3870         mutex_unlock(&ext4_li_mtx);
3871
3872         return 0;
3873 }
3874
3875 static void ext4_clear_request_list(void)
3876 {
3877         struct list_head *pos, *n;
3878         struct ext4_li_request *elr;
3879
3880         mutex_lock(&ext4_li_info->li_list_mtx);
3881         list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3882                 elr = list_entry(pos, struct ext4_li_request,
3883                                  lr_request);
3884                 ext4_remove_li_request(elr);
3885         }
3886         mutex_unlock(&ext4_li_info->li_list_mtx);
3887 }
3888
3889 static int ext4_run_lazyinit_thread(void)
3890 {
3891         ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3892                                          ext4_li_info, "ext4lazyinit");
3893         if (IS_ERR(ext4_lazyinit_task)) {
3894                 int err = PTR_ERR(ext4_lazyinit_task);
3895                 ext4_clear_request_list();
3896                 kfree(ext4_li_info);
3897                 ext4_li_info = NULL;
3898                 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3899                                  "initialization thread\n",
3900                                  err);
3901                 return err;
3902         }
3903         ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3904         return 0;
3905 }
3906
3907 /*
3908  * Check whether it make sense to run itable init. thread or not.
3909  * If there is at least one uninitialized inode table, return
3910  * corresponding group number, else the loop goes through all
3911  * groups and return total number of groups.
3912  */
3913 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3914 {
3915         ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3916         struct ext4_group_desc *gdp = NULL;
3917
3918         if (!ext4_has_group_desc_csum(sb))
3919                 return ngroups;
3920
3921         for (group = 0; group < ngroups; group++) {
3922                 gdp = ext4_get_group_desc(sb, group, NULL);
3923                 if (!gdp)
3924                         continue;
3925
3926                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3927                         break;
3928         }
3929
3930         return group;
3931 }
3932
3933 static int ext4_li_info_new(void)
3934 {
3935         struct ext4_lazy_init *eli = NULL;
3936
3937         eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3938         if (!eli)
3939                 return -ENOMEM;
3940
3941         INIT_LIST_HEAD(&eli->li_request_list);
3942         mutex_init(&eli->li_list_mtx);
3943
3944         eli->li_state |= EXT4_LAZYINIT_QUIT;
3945
3946         ext4_li_info = eli;
3947
3948         return 0;
3949 }
3950
3951 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3952                                             ext4_group_t start)
3953 {
3954         struct ext4_li_request *elr;
3955
3956         elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3957         if (!elr)
3958                 return NULL;
3959
3960         elr->lr_super = sb;
3961         elr->lr_first_not_zeroed = start;
3962         if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3963                 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3964                 elr->lr_next_group = start;
3965         } else {
3966                 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3967         }
3968
3969         /*
3970          * Randomize first schedule time of the request to
3971          * spread the inode table initialization requests
3972          * better.
3973          */
3974         elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3975         return elr;
3976 }
3977
3978 int ext4_register_li_request(struct super_block *sb,
3979                              ext4_group_t first_not_zeroed)
3980 {
3981         struct ext4_sb_info *sbi = EXT4_SB(sb);
3982         struct ext4_li_request *elr = NULL;
3983         ext4_group_t ngroups = sbi->s_groups_count;
3984         int ret = 0;
3985
3986         mutex_lock(&ext4_li_mtx);
3987         if (sbi->s_li_request != NULL) {
3988                 /*
3989                  * Reset timeout so it can be computed again, because
3990                  * s_li_wait_mult might have changed.
3991                  */
3992                 sbi->s_li_request->lr_timeout = 0;
3993                 goto out;
3994         }
3995
3996         if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
3997             (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3998              (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
3999                 goto out;
4000
4001         elr = ext4_li_request_new(sb, first_not_zeroed);
4002         if (!elr) {
4003                 ret = -ENOMEM;
4004                 goto out;
4005         }
4006
4007         if (NULL == ext4_li_info) {
4008                 ret = ext4_li_info_new();
4009                 if (ret)
4010                         goto out;
4011         }
4012
4013         mutex_lock(&ext4_li_info->li_list_mtx);
4014         list_add(&elr->lr_request, &ext4_li_info->li_request_list);
4015         mutex_unlock(&ext4_li_info->li_list_mtx);
4016
4017         sbi->s_li_request = elr;
4018         /*
4019          * set elr to NULL here since it has been inserted to
4020          * the request_list and the removal and free of it is
4021          * handled by ext4_clear_request_list from now on.
4022          */
4023         elr = NULL;
4024
4025         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4026                 ret = ext4_run_lazyinit_thread();
4027                 if (ret)
4028                         goto out;
4029         }
4030 out:
4031         mutex_unlock(&ext4_li_mtx);
4032         if (ret)
4033                 kfree(elr);
4034         return ret;
4035 }
4036
4037 /*
4038  * We do not need to lock anything since this is called on
4039  * module unload.
4040  */
4041 static void ext4_destroy_lazyinit_thread(void)
4042 {
4043         /*
4044          * If thread exited earlier
4045          * there's nothing to be done.
4046          */
4047         if (!ext4_li_info || !ext4_lazyinit_task)
4048                 return;
4049
4050         kthread_stop(ext4_lazyinit_task);
4051 }
4052
4053 static int set_journal_csum_feature_set(struct super_block *sb)
4054 {
4055         int ret = 1;
4056         int compat, incompat;
4057         struct ext4_sb_info *sbi = EXT4_SB(sb);
4058
4059         if (ext4_has_feature_metadata_csum(sb)) {
4060                 /* journal checksum v3 */
4061                 compat = 0;
4062                 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4063         } else {
4064                 /* journal checksum v1 */
4065                 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4066                 incompat = 0;
4067         }
4068
4069         jbd2_journal_clear_features(sbi->s_journal,
4070                         JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4071                         JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4072                         JBD2_FEATURE_INCOMPAT_CSUM_V2);
4073         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4074                 ret = jbd2_journal_set_features(sbi->s_journal,
4075                                 compat, 0,
4076                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4077                                 incompat);
4078         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4079                 ret = jbd2_journal_set_features(sbi->s_journal,
4080                                 compat, 0,
4081                                 incompat);
4082                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4083                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4084         } else {
4085                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4086                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4087         }
4088
4089         return ret;
4090 }
4091
4092 /*
4093  * Note: calculating the overhead so we can be compatible with
4094  * historical BSD practice is quite difficult in the face of
4095  * clusters/bigalloc.  This is because multiple metadata blocks from
4096  * different block group can end up in the same allocation cluster.
4097  * Calculating the exact overhead in the face of clustered allocation
4098  * requires either O(all block bitmaps) in memory or O(number of block
4099  * groups**2) in time.  We will still calculate the superblock for
4100  * older file systems --- and if we come across with a bigalloc file
4101  * system with zero in s_overhead_clusters the estimate will be close to
4102  * correct especially for very large cluster sizes --- but for newer
4103  * file systems, it's better to calculate this figure once at mkfs
4104  * time, and store it in the superblock.  If the superblock value is
4105  * present (even for non-bigalloc file systems), we will use it.
4106  */
4107 static int count_overhead(struct super_block *sb, ext4_group_t grp,
4108                           char *buf)
4109 {
4110         struct ext4_sb_info     *sbi = EXT4_SB(sb);
4111         struct ext4_group_desc  *gdp;
4112         ext4_fsblk_t            first_block, last_block, b;
4113         ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
4114         int                     s, j, count = 0;
4115         int                     has_super = ext4_bg_has_super(sb, grp);
4116
4117         if (!ext4_has_feature_bigalloc(sb))
4118                 return (has_super + ext4_bg_num_gdb(sb, grp) +
4119                         (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4120                         sbi->s_itb_per_group + 2);
4121
4122         first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4123                 (grp * EXT4_BLOCKS_PER_GROUP(sb));
4124         last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4125         for (i = 0; i < ngroups; i++) {
4126                 gdp = ext4_get_group_desc(sb, i, NULL);
4127                 b = ext4_block_bitmap(sb, gdp);
4128                 if (b >= first_block && b <= last_block) {
4129                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4130                         count++;
4131                 }
4132                 b = ext4_inode_bitmap(sb, gdp);
4133                 if (b >= first_block && b <= last_block) {
4134                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4135                         count++;
4136                 }
4137                 b = ext4_inode_table(sb, gdp);
4138                 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4139                         for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4140                                 int c = EXT4_B2C(sbi, b - first_block);
4141                                 ext4_set_bit(c, buf);
4142                                 count++;
4143                         }
4144                 if (i != grp)
4145                         continue;
4146                 s = 0;
4147                 if (ext4_bg_has_super(sb, grp)) {
4148                         ext4_set_bit(s++, buf);
4149                         count++;
4150                 }
4151                 j = ext4_bg_num_gdb(sb, grp);
4152                 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4153                         ext4_error(sb, "Invalid number of block group "
4154                                    "descriptor blocks: %d", j);
4155                         j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4156                 }
4157                 count += j;
4158                 for (; j > 0; j--)
4159                         ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4160         }
4161         if (!count)
4162                 return 0;
4163         return EXT4_CLUSTERS_PER_GROUP(sb) -
4164                 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4165 }
4166
4167 /*
4168  * Compute the overhead and stash it in sbi->s_overhead
4169  */
4170 int ext4_calculate_overhead(struct super_block *sb)
4171 {
4172         struct ext4_sb_info *sbi = EXT4_SB(sb);
4173         struct ext4_super_block *es = sbi->s_es;
4174         struct inode *j_inode;
4175         unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4176         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4177         ext4_fsblk_t overhead = 0;
4178         char *buf = (char *) get_zeroed_page(GFP_NOFS);
4179
4180         if (!buf)
4181                 return -ENOMEM;
4182
4183         /*
4184          * Compute the overhead (FS structures).  This is constant
4185          * for a given filesystem unless the number of block groups
4186          * changes so we cache the previous value until it does.
4187          */
4188
4189         /*
4190          * All of the blocks before first_data_block are overhead
4191          */
4192         overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4193
4194         /*
4195          * Add the overhead found in each block group
4196          */
4197         for (i = 0; i < ngroups; i++) {
4198                 int blks;
4199
4200                 blks = count_overhead(sb, i, buf);
4201                 overhead += blks;
4202                 if (blks)
4203                         memset(buf, 0, PAGE_SIZE);
4204                 cond_resched();
4205         }
4206
4207         /*
4208          * Add the internal journal blocks whether the journal has been
4209          * loaded or not
4210          */
4211         if (sbi->s_journal && !sbi->s_journal_bdev_file)
4212                 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4213         else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4214                 /* j_inum for internal journal is non-zero */
4215                 j_inode = ext4_get_journal_inode(sb, j_inum);
4216                 if (!IS_ERR(j_inode)) {
4217                         j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4218                         overhead += EXT4_NUM_B2C(sbi, j_blocks);
4219                         iput(j_inode);
4220                 } else {
4221                         ext4_msg(sb, KERN_ERR, "can't get journal size");
4222                 }
4223         }
4224         sbi->s_overhead = overhead;
4225         smp_wmb();
4226         free_page((unsigned long) buf);
4227         return 0;
4228 }
4229
4230 static void ext4_set_resv_clusters(struct super_block *sb)
4231 {
4232         ext4_fsblk_t resv_clusters;
4233         struct ext4_sb_info *sbi = EXT4_SB(sb);
4234
4235         /*
4236          * There's no need to reserve anything when we aren't using extents.
4237          * The space estimates are exact, there are no unwritten extents,
4238          * hole punching doesn't need new metadata... This is needed especially
4239          * to keep ext2/3 backward compatibility.
4240          */
4241         if (!ext4_has_feature_extents(sb))
4242                 return;
4243         /*
4244          * By default we reserve 2% or 4096 clusters, whichever is smaller.
4245          * This should cover the situations where we can not afford to run
4246          * out of space like for example punch hole, or converting
4247          * unwritten extents in delalloc path. In most cases such
4248          * allocation would require 1, or 2 blocks, higher numbers are
4249          * very rare.
4250          */
4251         resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4252                          sbi->s_cluster_bits);
4253
4254         do_div(resv_clusters, 50);
4255         resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4256
4257         atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4258 }
4259
4260 static const char *ext4_quota_mode(struct super_block *sb)
4261 {
4262 #ifdef CONFIG_QUOTA
4263         if (!ext4_quota_capable(sb))
4264                 return "none";
4265
4266         if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4267                 return "journalled";
4268         else
4269                 return "writeback";
4270 #else
4271         return "disabled";
4272 #endif
4273 }
4274
4275 static void ext4_setup_csum_trigger(struct super_block *sb,
4276                                     enum ext4_journal_trigger_type type,
4277                                     void (*trigger)(
4278                                         struct jbd2_buffer_trigger_type *type,
4279                                         struct buffer_head *bh,
4280                                         void *mapped_data,
4281                                         size_t size))
4282 {
4283         struct ext4_sb_info *sbi = EXT4_SB(sb);
4284
4285         sbi->s_journal_triggers[type].sb = sb;
4286         sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4287 }
4288
4289 static void ext4_free_sbi(struct ext4_sb_info *sbi)
4290 {
4291         if (!sbi)
4292                 return;
4293
4294         kfree(sbi->s_blockgroup_lock);
4295         fs_put_dax(sbi->s_daxdev, NULL);
4296         kfree(sbi);
4297 }
4298
4299 static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4300 {
4301         struct ext4_sb_info *sbi;
4302
4303         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4304         if (!sbi)
4305                 return NULL;
4306
4307         sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4308                                            NULL, NULL);
4309
4310         sbi->s_blockgroup_lock =
4311                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4312
4313         if (!sbi->s_blockgroup_lock)
4314                 goto err_out;
4315
4316         sb->s_fs_info = sbi;
4317         sbi->s_sb = sb;
4318         return sbi;
4319 err_out:
4320         fs_put_dax(sbi->s_daxdev, NULL);
4321         kfree(sbi);
4322         return NULL;
4323 }
4324
4325 static void ext4_set_def_opts(struct super_block *sb,
4326                               struct ext4_super_block *es)
4327 {
4328         unsigned long def_mount_opts;
4329
4330         /* Set defaults before we parse the mount options */
4331         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4332         set_opt(sb, INIT_INODE_TABLE);
4333         if (def_mount_opts & EXT4_DEFM_DEBUG)
4334                 set_opt(sb, DEBUG);
4335         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4336                 set_opt(sb, GRPID);
4337         if (def_mount_opts & EXT4_DEFM_UID16)
4338                 set_opt(sb, NO_UID32);
4339         /* xattr user namespace & acls are now defaulted on */
4340         set_opt(sb, XATTR_USER);
4341 #ifdef CONFIG_EXT4_FS_POSIX_ACL
4342         set_opt(sb, POSIX_ACL);
4343 #endif
4344         if (ext4_has_feature_fast_commit(sb))
4345                 set_opt2(sb, JOURNAL_FAST_COMMIT);
4346         /* don't forget to enable journal_csum when metadata_csum is enabled. */
4347         if (ext4_has_feature_metadata_csum(sb))
4348                 set_opt(sb, JOURNAL_CHECKSUM);
4349
4350         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4351                 set_opt(sb, JOURNAL_DATA);
4352         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4353                 set_opt(sb, ORDERED_DATA);
4354         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4355                 set_opt(sb, WRITEBACK_DATA);
4356
4357         if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4358                 set_opt(sb, ERRORS_PANIC);
4359         else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4360                 set_opt(sb, ERRORS_CONT);
4361         else
4362                 set_opt(sb, ERRORS_RO);
4363         /* block_validity enabled by default; disable with noblock_validity */
4364         set_opt(sb, BLOCK_VALIDITY);
4365         if (def_mount_opts & EXT4_DEFM_DISCARD)
4366                 set_opt(sb, DISCARD);
4367
4368         if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4369                 set_opt(sb, BARRIER);
4370
4371         /*
4372          * enable delayed allocation by default
4373          * Use -o nodelalloc to turn it off
4374          */
4375         if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4376             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4377                 set_opt(sb, DELALLOC);
4378
4379         if (sb->s_blocksize <= PAGE_SIZE)
4380                 set_opt(sb, DIOREAD_NOLOCK);
4381 }
4382
4383 static int ext4_handle_clustersize(struct super_block *sb)
4384 {
4385         struct ext4_sb_info *sbi = EXT4_SB(sb);
4386         struct ext4_super_block *es = sbi->s_es;
4387         int clustersize;
4388
4389         /* Handle clustersize */
4390         clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4391         if (ext4_has_feature_bigalloc(sb)) {
4392                 if (clustersize < sb->s_blocksize) {
4393                         ext4_msg(sb, KERN_ERR,
4394                                  "cluster size (%d) smaller than "
4395                                  "block size (%lu)", clustersize, sb->s_blocksize);
4396                         return -EINVAL;
4397                 }
4398                 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4399                         le32_to_cpu(es->s_log_block_size);
4400         } else {
4401                 if (clustersize != sb->s_blocksize) {
4402                         ext4_msg(sb, KERN_ERR,
4403                                  "fragment/cluster size (%d) != "
4404                                  "block size (%lu)", clustersize, sb->s_blocksize);
4405                         return -EINVAL;
4406                 }
4407                 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4408                         ext4_msg(sb, KERN_ERR,
4409                                  "#blocks per group too big: %lu",
4410                                  sbi->s_blocks_per_group);
4411                         return -EINVAL;
4412                 }
4413                 sbi->s_cluster_bits = 0;
4414         }
4415         sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group);
4416         if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4417                 ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
4418                          sbi->s_clusters_per_group);
4419                 return -EINVAL;
4420         }
4421         if (sbi->s_blocks_per_group !=
4422             (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4423                 ext4_msg(sb, KERN_ERR,
4424                          "blocks per group (%lu) and clusters per group (%lu) inconsistent",
4425                          sbi->s_blocks_per_group, sbi->s_clusters_per_group);
4426                 return -EINVAL;
4427         }
4428         sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4429
4430         /* Do we have standard group size of clustersize * 8 blocks ? */
4431         if (sbi->s_blocks_per_group == clustersize << 3)
4432                 set_opt2(sb, STD_GROUP_SIZE);
4433
4434         return 0;
4435 }
4436
4437 /*
4438  * ext4_atomic_write_init: Initializes filesystem min & max atomic write units.
4439  * With non-bigalloc filesystem awu will be based upon filesystem blocksize
4440  * & bdev awu units.
4441  * With bigalloc it will be based upon bigalloc cluster size & bdev awu units.
4442  * @sb: super block
4443  */
4444 static void ext4_atomic_write_init(struct super_block *sb)
4445 {
4446         struct ext4_sb_info *sbi = EXT4_SB(sb);
4447         struct block_device *bdev = sb->s_bdev;
4448         unsigned int clustersize = EXT4_CLUSTER_SIZE(sb);
4449
4450         if (!bdev_can_atomic_write(bdev))
4451                 return;
4452
4453         if (!ext4_has_feature_extents(sb))
4454                 return;
4455
4456         sbi->s_awu_min = max(sb->s_blocksize,
4457                               bdev_atomic_write_unit_min_bytes(bdev));
4458         sbi->s_awu_max = min(clustersize,
4459                               bdev_atomic_write_unit_max_bytes(bdev));
4460         if (sbi->s_awu_min && sbi->s_awu_max &&
4461             sbi->s_awu_min <= sbi->s_awu_max) {
4462                 ext4_msg(sb, KERN_NOTICE, "Supports (experimental) DIO atomic writes awu_min: %u, awu_max: %u",
4463                          sbi->s_awu_min, sbi->s_awu_max);
4464         } else {
4465                 sbi->s_awu_min = 0;
4466                 sbi->s_awu_max = 0;
4467         }
4468 }
4469
4470 static void ext4_fast_commit_init(struct super_block *sb)
4471 {
4472         struct ext4_sb_info *sbi = EXT4_SB(sb);
4473
4474         /* Initialize fast commit stuff */
4475         atomic_set(&sbi->s_fc_subtid, 0);
4476         INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4477         INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4478         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4479         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4480         sbi->s_fc_bytes = 0;
4481         ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4482         sbi->s_fc_ineligible_tid = 0;
4483         mutex_init(&sbi->s_fc_lock);
4484         memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4485         sbi->s_fc_replay_state.fc_regions = NULL;
4486         sbi->s_fc_replay_state.fc_regions_size = 0;
4487         sbi->s_fc_replay_state.fc_regions_used = 0;
4488         sbi->s_fc_replay_state.fc_regions_valid = 0;
4489         sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4490         sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4491         sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4492 }
4493
4494 static int ext4_inode_info_init(struct super_block *sb,
4495                                 struct ext4_super_block *es)
4496 {
4497         struct ext4_sb_info *sbi = EXT4_SB(sb);
4498
4499         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4500                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4501                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4502         } else {
4503                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4504                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4505                 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4506                         ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4507                                  sbi->s_first_ino);
4508                         return -EINVAL;
4509                 }
4510                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4511                     (!is_power_of_2(sbi->s_inode_size)) ||
4512                     (sbi->s_inode_size > sb->s_blocksize)) {
4513                         ext4_msg(sb, KERN_ERR,
4514                                "unsupported inode size: %d",
4515                                sbi->s_inode_size);
4516                         ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4517                         return -EINVAL;
4518                 }
4519                 /*
4520                  * i_atime_extra is the last extra field available for
4521                  * [acm]times in struct ext4_inode. Checking for that
4522                  * field should suffice to ensure we have extra space
4523                  * for all three.
4524                  */
4525                 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4526                         sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4527                         sb->s_time_gran = 1;
4528                         sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4529                 } else {
4530                         sb->s_time_gran = NSEC_PER_SEC;
4531                         sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4532                 }
4533                 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4534         }
4535
4536         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4537                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4538                         EXT4_GOOD_OLD_INODE_SIZE;
4539                 if (ext4_has_feature_extra_isize(sb)) {
4540                         unsigned v, max = (sbi->s_inode_size -
4541                                            EXT4_GOOD_OLD_INODE_SIZE);
4542
4543                         v = le16_to_cpu(es->s_want_extra_isize);
4544                         if (v > max) {
4545                                 ext4_msg(sb, KERN_ERR,
4546                                          "bad s_want_extra_isize: %d", v);
4547                                 return -EINVAL;
4548                         }
4549                         if (sbi->s_want_extra_isize < v)
4550                                 sbi->s_want_extra_isize = v;
4551
4552                         v = le16_to_cpu(es->s_min_extra_isize);
4553                         if (v > max) {
4554                                 ext4_msg(sb, KERN_ERR,
4555                                          "bad s_min_extra_isize: %d", v);
4556                                 return -EINVAL;
4557                         }
4558                         if (sbi->s_want_extra_isize < v)
4559                                 sbi->s_want_extra_isize = v;
4560                 }
4561         }
4562
4563         return 0;
4564 }
4565
4566 #if IS_ENABLED(CONFIG_UNICODE)
4567 static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4568 {
4569         const struct ext4_sb_encodings *encoding_info;
4570         struct unicode_map *encoding;
4571         __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4572
4573         if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4574                 return 0;
4575
4576         encoding_info = ext4_sb_read_encoding(es);
4577         if (!encoding_info) {
4578                 ext4_msg(sb, KERN_ERR,
4579                         "Encoding requested by superblock is unknown");
4580                 return -EINVAL;
4581         }
4582
4583         encoding = utf8_load(encoding_info->version);
4584         if (IS_ERR(encoding)) {
4585                 ext4_msg(sb, KERN_ERR,
4586                         "can't mount with superblock charset: %s-%u.%u.%u "
4587                         "not supported by the kernel. flags: 0x%x.",
4588                         encoding_info->name,
4589                         unicode_major(encoding_info->version),
4590                         unicode_minor(encoding_info->version),
4591                         unicode_rev(encoding_info->version),
4592                         encoding_flags);
4593                 return -EINVAL;
4594         }
4595         ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4596                 "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4597                 unicode_major(encoding_info->version),
4598                 unicode_minor(encoding_info->version),
4599                 unicode_rev(encoding_info->version),
4600                 encoding_flags);
4601
4602         sb->s_encoding = encoding;
4603         sb->s_encoding_flags = encoding_flags;
4604
4605         return 0;
4606 }
4607 #else
4608 static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4609 {
4610         return 0;
4611 }
4612 #endif
4613
4614 static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4615 {
4616         struct ext4_sb_info *sbi = EXT4_SB(sb);
4617
4618         /* Warn if metadata_csum and gdt_csum are both set. */
4619         if (ext4_has_feature_metadata_csum(sb) &&
4620             ext4_has_feature_gdt_csum(sb))
4621                 ext4_warning(sb, "metadata_csum and uninit_bg are "
4622                              "redundant flags; please run fsck.");
4623
4624         /* Check for a known checksum algorithm */
4625         if (!ext4_verify_csum_type(sb, es)) {
4626                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4627                          "unknown checksum algorithm.");
4628                 return -EINVAL;
4629         }
4630         ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4631                                 ext4_orphan_file_block_trigger);
4632
4633         /* Check superblock checksum */
4634         if (!ext4_superblock_csum_verify(sb, es)) {
4635                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4636                          "invalid superblock checksum.  Run e2fsck?");
4637                 return -EFSBADCRC;
4638         }
4639
4640         /* Precompute checksum seed for all metadata */
4641         if (ext4_has_feature_csum_seed(sb))
4642                 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4643         else if (ext4_has_feature_metadata_csum(sb) ||
4644                  ext4_has_feature_ea_inode(sb))
4645                 sbi->s_csum_seed = ext4_chksum(~0, es->s_uuid,
4646                                                sizeof(es->s_uuid));
4647         return 0;
4648 }
4649
4650 static int ext4_check_feature_compatibility(struct super_block *sb,
4651                                             struct ext4_super_block *es,
4652                                             int silent)
4653 {
4654         struct ext4_sb_info *sbi = EXT4_SB(sb);
4655
4656         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4657             (ext4_has_compat_features(sb) ||
4658              ext4_has_ro_compat_features(sb) ||
4659              ext4_has_incompat_features(sb)))
4660                 ext4_msg(sb, KERN_WARNING,
4661                        "feature flags set on rev 0 fs, "
4662                        "running e2fsck is recommended");
4663
4664         if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4665                 set_opt2(sb, HURD_COMPAT);
4666                 if (ext4_has_feature_64bit(sb)) {
4667                         ext4_msg(sb, KERN_ERR,
4668                                  "The Hurd can't support 64-bit file systems");
4669                         return -EINVAL;
4670                 }
4671
4672                 /*
4673                  * ea_inode feature uses l_i_version field which is not
4674                  * available in HURD_COMPAT mode.
4675                  */
4676                 if (ext4_has_feature_ea_inode(sb)) {
4677                         ext4_msg(sb, KERN_ERR,
4678                                  "ea_inode feature is not supported for Hurd");
4679                         return -EINVAL;
4680                 }
4681         }
4682
4683         if (IS_EXT2_SB(sb)) {
4684                 if (ext2_feature_set_ok(sb))
4685                         ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4686                                  "using the ext4 subsystem");
4687                 else {
4688                         /*
4689                          * If we're probing be silent, if this looks like
4690                          * it's actually an ext[34] filesystem.
4691                          */
4692                         if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4693                                 return -EINVAL;
4694                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4695                                  "to feature incompatibilities");
4696                         return -EINVAL;
4697                 }
4698         }
4699
4700         if (IS_EXT3_SB(sb)) {
4701                 if (ext3_feature_set_ok(sb))
4702                         ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4703                                  "using the ext4 subsystem");
4704                 else {
4705                         /*
4706                          * If we're probing be silent, if this looks like
4707                          * it's actually an ext4 filesystem.
4708                          */
4709                         if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4710                                 return -EINVAL;
4711                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4712                                  "to feature incompatibilities");
4713                         return -EINVAL;
4714                 }
4715         }
4716
4717         /*
4718          * Check feature flags regardless of the revision level, since we
4719          * previously didn't change the revision level when setting the flags,
4720          * so there is a chance incompat flags are set on a rev 0 filesystem.
4721          */
4722         if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4723                 return -EINVAL;
4724
4725         if (sbi->s_daxdev) {
4726                 if (sb->s_blocksize == PAGE_SIZE)
4727                         set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4728                 else
4729                         ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
4730         }
4731
4732         if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4733                 if (ext4_has_feature_inline_data(sb)) {
4734                         ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4735                                         " that may contain inline data");
4736                         return -EINVAL;
4737                 }
4738                 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4739                         ext4_msg(sb, KERN_ERR,
4740                                 "DAX unsupported by block device.");
4741                         return -EINVAL;
4742                 }
4743         }
4744
4745         if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4746                 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4747                          es->s_encryption_level);
4748                 return -EINVAL;
4749         }
4750
4751         return 0;
4752 }
4753
4754 static int ext4_check_geometry(struct super_block *sb,
4755                                struct ext4_super_block *es)
4756 {
4757         struct ext4_sb_info *sbi = EXT4_SB(sb);
4758         __u64 blocks_count;
4759         int err;
4760
4761         if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
4762                 ext4_msg(sb, KERN_ERR,
4763                          "Number of reserved GDT blocks insanely large: %d",
4764                          le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4765                 return -EINVAL;
4766         }
4767         /*
4768          * Test whether we have more sectors than will fit in sector_t,
4769          * and whether the max offset is addressable by the page cache.
4770          */
4771         err = generic_check_addressable(sb->s_blocksize_bits,
4772                                         ext4_blocks_count(es));
4773         if (err) {
4774                 ext4_msg(sb, KERN_ERR, "filesystem"
4775                          " too large to mount safely on this system");
4776                 return err;
4777         }
4778
4779         /* check blocks count against device size */
4780         blocks_count = sb_bdev_nr_blocks(sb);
4781         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4782                 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4783                        "exceeds size of device (%llu blocks)",
4784                        ext4_blocks_count(es), blocks_count);
4785                 return -EINVAL;
4786         }
4787
4788         /*
4789          * It makes no sense for the first data block to be beyond the end
4790          * of the filesystem.
4791          */
4792         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4793                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4794                          "block %u is beyond end of filesystem (%llu)",
4795                          le32_to_cpu(es->s_first_data_block),
4796                          ext4_blocks_count(es));
4797                 return -EINVAL;
4798         }
4799         if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4800             (sbi->s_cluster_ratio == 1)) {
4801                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4802                          "block is 0 with a 1k block and cluster size");
4803                 return -EINVAL;
4804         }
4805
4806         blocks_count = (ext4_blocks_count(es) -
4807                         le32_to_cpu(es->s_first_data_block) +
4808                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
4809         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4810         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4811                 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4812                        "(block count %llu, first data block %u, "
4813                        "blocks per group %lu)", blocks_count,
4814                        ext4_blocks_count(es),
4815                        le32_to_cpu(es->s_first_data_block),
4816                        EXT4_BLOCKS_PER_GROUP(sb));
4817                 return -EINVAL;
4818         }
4819         sbi->s_groups_count = blocks_count;
4820         sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4821                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4822         if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4823             le32_to_cpu(es->s_inodes_count)) {
4824                 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4825                          le32_to_cpu(es->s_inodes_count),
4826                          ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4827                 return -EINVAL;
4828         }
4829
4830         return 0;
4831 }
4832
4833 static int ext4_group_desc_init(struct super_block *sb,
4834                                 struct ext4_super_block *es,
4835                                 ext4_fsblk_t logical_sb_block,
4836                                 ext4_group_t *first_not_zeroed)
4837 {
4838         struct ext4_sb_info *sbi = EXT4_SB(sb);
4839         unsigned int db_count;
4840         ext4_fsblk_t block;
4841         int i;
4842
4843         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4844                    EXT4_DESC_PER_BLOCK(sb);
4845         if (ext4_has_feature_meta_bg(sb)) {
4846                 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4847                         ext4_msg(sb, KERN_WARNING,
4848                                  "first meta block group too large: %u "
4849                                  "(group descriptor block count %u)",
4850                                  le32_to_cpu(es->s_first_meta_bg), db_count);
4851                         return -EINVAL;
4852                 }
4853         }
4854         rcu_assign_pointer(sbi->s_group_desc,
4855                            kvmalloc_array(db_count,
4856                                           sizeof(struct buffer_head *),
4857                                           GFP_KERNEL));
4858         if (sbi->s_group_desc == NULL) {
4859                 ext4_msg(sb, KERN_ERR, "not enough memory");
4860                 return -ENOMEM;
4861         }
4862
4863         bgl_lock_init(sbi->s_blockgroup_lock);
4864
4865         /* Pre-read the descriptors into the buffer cache */
4866         for (i = 0; i < db_count; i++) {
4867                 block = descriptor_loc(sb, logical_sb_block, i);
4868                 ext4_sb_breadahead_unmovable(sb, block);
4869         }
4870
4871         for (i = 0; i < db_count; i++) {
4872                 struct buffer_head *bh;
4873
4874                 block = descriptor_loc(sb, logical_sb_block, i);
4875                 bh = ext4_sb_bread_unmovable(sb, block);
4876                 if (IS_ERR(bh)) {
4877                         ext4_msg(sb, KERN_ERR,
4878                                "can't read group descriptor %d", i);
4879                         sbi->s_gdb_count = i;
4880                         return PTR_ERR(bh);
4881                 }
4882                 rcu_read_lock();
4883                 rcu_dereference(sbi->s_group_desc)[i] = bh;
4884                 rcu_read_unlock();
4885         }
4886         sbi->s_gdb_count = db_count;
4887         if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4888                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4889                 return -EFSCORRUPTED;
4890         }
4891
4892         return 0;
4893 }
4894
4895 static int ext4_load_and_init_journal(struct super_block *sb,
4896                                       struct ext4_super_block *es,
4897                                       struct ext4_fs_context *ctx)
4898 {
4899         struct ext4_sb_info *sbi = EXT4_SB(sb);
4900         int err;
4901
4902         err = ext4_load_journal(sb, es, ctx->journal_devnum);
4903         if (err)
4904                 return err;
4905
4906         if (ext4_has_feature_64bit(sb) &&
4907             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4908                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
4909                 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4910                 goto out;
4911         }
4912
4913         if (!set_journal_csum_feature_set(sb)) {
4914                 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4915                          "feature set");
4916                 goto out;
4917         }
4918
4919         if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4920                 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4921                                           JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4922                 ext4_msg(sb, KERN_ERR,
4923                         "Failed to set fast commit journal feature");
4924                 goto out;
4925         }
4926
4927         /* We have now updated the journal if required, so we can
4928          * validate the data journaling mode. */
4929         switch (test_opt(sb, DATA_FLAGS)) {
4930         case 0:
4931                 /* No mode set, assume a default based on the journal
4932                  * capabilities: ORDERED_DATA if the journal can
4933                  * cope, else JOURNAL_DATA
4934                  */
4935                 if (jbd2_journal_check_available_features
4936                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4937                         set_opt(sb, ORDERED_DATA);
4938                         sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4939                 } else {
4940                         set_opt(sb, JOURNAL_DATA);
4941                         sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4942                 }
4943                 break;
4944
4945         case EXT4_MOUNT_ORDERED_DATA:
4946         case EXT4_MOUNT_WRITEBACK_DATA:
4947                 if (!jbd2_journal_check_available_features
4948                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4949                         ext4_msg(sb, KERN_ERR, "Journal does not support "
4950                                "requested data journaling mode");
4951                         goto out;
4952                 }
4953                 break;
4954         default:
4955                 break;
4956         }
4957
4958         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4959             test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4960                 ext4_msg(sb, KERN_ERR, "can't mount with "
4961                         "journal_async_commit in data=ordered mode");
4962                 goto out;
4963         }
4964
4965         set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4966
4967         sbi->s_journal->j_submit_inode_data_buffers =
4968                 ext4_journal_submit_inode_data_buffers;
4969         sbi->s_journal->j_finish_inode_data_buffers =
4970                 ext4_journal_finish_inode_data_buffers;
4971
4972         return 0;
4973
4974 out:
4975         ext4_journal_destroy(sbi, sbi->s_journal);
4976         return -EINVAL;
4977 }
4978
4979 static int ext4_check_journal_data_mode(struct super_block *sb)
4980 {
4981         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4982                 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4983                             "data=journal disables delayed allocation, "
4984                             "dioread_nolock, O_DIRECT and fast_commit support!\n");
4985                 /* can't mount with both data=journal and dioread_nolock. */
4986                 clear_opt(sb, DIOREAD_NOLOCK);
4987                 clear_opt2(sb, JOURNAL_FAST_COMMIT);
4988                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4989                         ext4_msg(sb, KERN_ERR, "can't mount with "
4990                                  "both data=journal and delalloc");
4991                         return -EINVAL;
4992                 }
4993                 if (test_opt(sb, DAX_ALWAYS)) {
4994                         ext4_msg(sb, KERN_ERR, "can't mount with "
4995                                  "both data=journal and dax");
4996                         return -EINVAL;
4997                 }
4998                 if (ext4_has_feature_encrypt(sb)) {
4999                         ext4_msg(sb, KERN_WARNING,
5000                                  "encrypted files will use data=ordered "
5001                                  "instead of data journaling mode");
5002                 }
5003                 if (test_opt(sb, DELALLOC))
5004                         clear_opt(sb, DELALLOC);
5005         } else {
5006                 sb->s_iflags |= SB_I_CGROUPWB;
5007         }
5008
5009         return 0;
5010 }
5011
5012 static const char *ext4_has_journal_option(struct super_block *sb)
5013 {
5014         struct ext4_sb_info *sbi = EXT4_SB(sb);
5015
5016         if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
5017                 return "journal_async_commit";
5018         if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM))
5019                 return "journal_checksum";
5020         if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
5021                 return "commit=";
5022         if (EXT4_MOUNT_DATA_FLAGS &
5023             (sbi->s_mount_opt ^ sbi->s_def_mount_opt))
5024                 return "data=";
5025         if (test_opt(sb, DATA_ERR_ABORT))
5026                 return "data_err=abort";
5027         return NULL;
5028 }
5029
5030 static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
5031                            int silent)
5032 {
5033         struct ext4_sb_info *sbi = EXT4_SB(sb);
5034         struct ext4_super_block *es;
5035         ext4_fsblk_t logical_sb_block;
5036         unsigned long offset = 0;
5037         struct buffer_head *bh;
5038         int ret = -EINVAL;
5039         int blocksize;
5040
5041         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
5042         if (!blocksize) {
5043                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
5044                 return -EINVAL;
5045         }
5046
5047         /*
5048          * The ext4 superblock will not be buffer aligned for other than 1kB
5049          * block sizes.  We need to calculate the offset from buffer start.
5050          */
5051         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
5052                 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5053                 offset = do_div(logical_sb_block, blocksize);
5054         } else {
5055                 logical_sb_block = sbi->s_sb_block;
5056         }
5057
5058         bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5059         if (IS_ERR(bh)) {
5060                 ext4_msg(sb, KERN_ERR, "unable to read superblock");
5061                 return PTR_ERR(bh);
5062         }
5063         /*
5064          * Note: s_es must be initialized as soon as possible because
5065          *       some ext4 macro-instructions depend on its value
5066          */
5067         es = (struct ext4_super_block *) (bh->b_data + offset);
5068         sbi->s_es = es;
5069         sb->s_magic = le16_to_cpu(es->s_magic);
5070         if (sb->s_magic != EXT4_SUPER_MAGIC) {
5071                 if (!silent)
5072                         ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5073                 goto out;
5074         }
5075
5076         if (le32_to_cpu(es->s_log_block_size) >
5077             (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5078                 ext4_msg(sb, KERN_ERR,
5079                          "Invalid log block size: %u",
5080                          le32_to_cpu(es->s_log_block_size));
5081                 goto out;
5082         }
5083         if (le32_to_cpu(es->s_log_cluster_size) >
5084             (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5085                 ext4_msg(sb, KERN_ERR,
5086                          "Invalid log cluster size: %u",
5087                          le32_to_cpu(es->s_log_cluster_size));
5088                 goto out;
5089         }
5090
5091         blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
5092
5093         /*
5094          * If the default block size is not the same as the real block size,
5095          * we need to reload it.
5096          */
5097         if (sb->s_blocksize == blocksize) {
5098                 *lsb = logical_sb_block;
5099                 sbi->s_sbh = bh;
5100                 return 0;
5101         }
5102
5103         /*
5104          * bh must be released before kill_bdev(), otherwise
5105          * it won't be freed and its page also. kill_bdev()
5106          * is called by sb_set_blocksize().
5107          */
5108         brelse(bh);
5109         /* Validate the filesystem blocksize */
5110         if (!sb_set_blocksize(sb, blocksize)) {
5111                 ext4_msg(sb, KERN_ERR, "bad block size %d",
5112                                 blocksize);
5113                 bh = NULL;
5114                 goto out;
5115         }
5116
5117         logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5118         offset = do_div(logical_sb_block, blocksize);
5119         bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5120         if (IS_ERR(bh)) {
5121                 ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5122                 ret = PTR_ERR(bh);
5123                 bh = NULL;
5124                 goto out;
5125         }
5126         es = (struct ext4_super_block *)(bh->b_data + offset);
5127         sbi->s_es = es;
5128         if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5129                 ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5130                 goto out;
5131         }
5132         *lsb = logical_sb_block;
5133         sbi->s_sbh = bh;
5134         return 0;
5135 out:
5136         brelse(bh);
5137         return ret;
5138 }
5139
5140 static int ext4_hash_info_init(struct super_block *sb)
5141 {
5142         struct ext4_sb_info *sbi = EXT4_SB(sb);
5143         struct ext4_super_block *es = sbi->s_es;
5144         unsigned int i;
5145
5146         sbi->s_def_hash_version = es->s_def_hash_version;
5147
5148         if (sbi->s_def_hash_version > DX_HASH_LAST) {
5149                 ext4_msg(sb, KERN_ERR,
5150                          "Invalid default hash set in the superblock");
5151                 return -EINVAL;
5152         } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
5153                 ext4_msg(sb, KERN_ERR,
5154                          "SIPHASH is not a valid default hash value");
5155                 return -EINVAL;
5156         }
5157
5158         for (i = 0; i < 4; i++)
5159                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5160
5161         if (ext4_has_feature_dir_index(sb)) {
5162                 i = le32_to_cpu(es->s_flags);
5163                 if (i & EXT2_FLAGS_UNSIGNED_HASH)
5164                         sbi->s_hash_unsigned = 3;
5165                 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5166 #ifdef __CHAR_UNSIGNED__
5167                         if (!sb_rdonly(sb))
5168                                 es->s_flags |=
5169                                         cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5170                         sbi->s_hash_unsigned = 3;
5171 #else
5172                         if (!sb_rdonly(sb))
5173                                 es->s_flags |=
5174                                         cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5175 #endif
5176                 }
5177         }
5178         return 0;
5179 }
5180
5181 static int ext4_block_group_meta_init(struct super_block *sb, int silent)
5182 {
5183         struct ext4_sb_info *sbi = EXT4_SB(sb);
5184         struct ext4_super_block *es = sbi->s_es;
5185         int has_huge_files;
5186
5187         has_huge_files = ext4_has_feature_huge_file(sb);
5188         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5189                                                       has_huge_files);
5190         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5191
5192         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5193         if (ext4_has_feature_64bit(sb)) {
5194                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5195                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5196                     !is_power_of_2(sbi->s_desc_size)) {
5197                         ext4_msg(sb, KERN_ERR,
5198                                "unsupported descriptor size %lu",
5199                                sbi->s_desc_size);
5200                         return -EINVAL;
5201                 }
5202         } else
5203                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5204
5205         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5206         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5207
5208         sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5209         if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5210                 if (!silent)
5211                         ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5212                 return -EINVAL;
5213         }
5214         if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5215             sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5216                 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5217                          sbi->s_inodes_per_group);
5218                 return -EINVAL;
5219         }
5220         sbi->s_itb_per_group = sbi->s_inodes_per_group /
5221                                         sbi->s_inodes_per_block;
5222         sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5223         sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5224         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5225         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5226
5227         return 0;
5228 }
5229
5230 /*
5231  * It's hard to get stripe aligned blocks if stripe is not aligned with
5232  * cluster, just disable stripe and alert user to simplify code and avoid
5233  * stripe aligned allocation which will rarely succeed.
5234  */
5235 static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
5236 {
5237         struct ext4_sb_info *sbi = EXT4_SB(sb);
5238         return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
5239                 stripe % sbi->s_cluster_ratio != 0);
5240 }
5241
5242 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5243 {
5244         struct ext4_super_block *es = NULL;
5245         struct ext4_sb_info *sbi = EXT4_SB(sb);
5246         ext4_fsblk_t logical_sb_block;
5247         struct inode *root;
5248         int needs_recovery;
5249         int err;
5250         ext4_group_t first_not_zeroed;
5251         struct ext4_fs_context *ctx = fc->fs_private;
5252         int silent = fc->sb_flags & SB_SILENT;
5253
5254         /* Set defaults for the variables that will be set during parsing */
5255         if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5256                 ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
5257
5258         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5259         sbi->s_sectors_written_start =
5260                 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5261
5262         err = ext4_load_super(sb, &logical_sb_block, silent);
5263         if (err)
5264                 goto out_fail;
5265
5266         es = sbi->s_es;
5267         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5268
5269         err = ext4_init_metadata_csum(sb, es);
5270         if (err)
5271                 goto failed_mount;
5272
5273         ext4_set_def_opts(sb, es);
5274
5275         sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5276         sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5277         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5278         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5279         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5280         sbi->s_sb_update_kb = EXT4_DEF_SB_UPDATE_INTERVAL_KB;
5281         sbi->s_sb_update_sec = EXT4_DEF_SB_UPDATE_INTERVAL_SEC;
5282
5283         /*
5284          * set default s_li_wait_mult for lazyinit, for the case there is
5285          * no mount option specified.
5286          */
5287         sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5288
5289         err = ext4_inode_info_init(sb, es);
5290         if (err)
5291                 goto failed_mount;
5292
5293         err = parse_apply_sb_mount_options(sb, ctx);
5294         if (err < 0)
5295                 goto failed_mount;
5296
5297         sbi->s_def_mount_opt = sbi->s_mount_opt;
5298         sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5299
5300         err = ext4_check_opt_consistency(fc, sb);
5301         if (err < 0)
5302                 goto failed_mount;
5303
5304         ext4_apply_options(fc, sb);
5305
5306         err = ext4_encoding_init(sb, es);
5307         if (err)
5308                 goto failed_mount;
5309
5310         err = ext4_check_journal_data_mode(sb);
5311         if (err)
5312                 goto failed_mount;
5313
5314         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5315                 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5316
5317         /* i_version is always enabled now */
5318         sb->s_flags |= SB_I_VERSION;
5319
5320         /* HSM events are allowed by default. */
5321         sb->s_iflags |= SB_I_ALLOW_HSM;
5322
5323         err = ext4_check_feature_compatibility(sb, es, silent);
5324         if (err)
5325                 goto failed_mount;
5326
5327         err = ext4_block_group_meta_init(sb, silent);
5328         if (err)
5329                 goto failed_mount;
5330
5331         err = ext4_hash_info_init(sb);
5332         if (err)
5333                 goto failed_mount;
5334
5335         err = ext4_handle_clustersize(sb);
5336         if (err)
5337                 goto failed_mount;
5338
5339         err = ext4_check_geometry(sb, es);
5340         if (err)
5341                 goto failed_mount;
5342
5343         timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5344         spin_lock_init(&sbi->s_error_lock);
5345         INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
5346
5347         err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5348         if (err)
5349                 goto failed_mount3;
5350
5351         err = ext4_es_register_shrinker(sbi);
5352         if (err)
5353                 goto failed_mount3;
5354
5355         sbi->s_stripe = ext4_get_stripe_size(sbi);
5356         if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
5357                 ext4_msg(sb, KERN_WARNING,
5358                          "stripe (%lu) is not aligned with cluster size (%u), "
5359                          "stripe is disabled",
5360                          sbi->s_stripe, sbi->s_cluster_ratio);
5361                 sbi->s_stripe = 0;
5362         }
5363         sbi->s_extent_max_zeroout_kb = 32;
5364
5365         /*
5366          * set up enough so that it can read an inode
5367          */
5368         sb->s_op = &ext4_sops;
5369         sb->s_export_op = &ext4_export_ops;
5370         sb->s_xattr = ext4_xattr_handlers;
5371 #ifdef CONFIG_FS_ENCRYPTION
5372         sb->s_cop = &ext4_cryptops;
5373 #endif
5374 #ifdef CONFIG_FS_VERITY
5375         sb->s_vop = &ext4_verityops;
5376 #endif
5377 #ifdef CONFIG_QUOTA
5378         sb->dq_op = &ext4_quota_operations;
5379         if (ext4_has_feature_quota(sb))
5380                 sb->s_qcop = &dquot_quotactl_sysfile_ops;
5381         else
5382                 sb->s_qcop = &ext4_qctl_operations;
5383         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5384 #endif
5385         super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
5386         super_set_sysfs_name_bdev(sb);
5387
5388         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5389         mutex_init(&sbi->s_orphan_lock);
5390
5391         spin_lock_init(&sbi->s_bdev_wb_lock);
5392
5393         ext4_atomic_write_init(sb);
5394         ext4_fast_commit_init(sb);
5395
5396         sb->s_root = NULL;
5397
5398         needs_recovery = (es->s_last_orphan != 0 ||
5399                           ext4_has_feature_orphan_present(sb) ||
5400                           ext4_has_feature_journal_needs_recovery(sb));
5401
5402         if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5403                 err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5404                 if (err)
5405                         goto failed_mount3a;
5406         }
5407
5408         err = -EINVAL;
5409         /*
5410          * The first inode we look at is the journal inode.  Don't try
5411          * root first: it may be modified in the journal!
5412          */
5413         if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5414                 err = ext4_load_and_init_journal(sb, es, ctx);
5415                 if (err)
5416                         goto failed_mount3a;
5417         } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5418                    ext4_has_feature_journal_needs_recovery(sb)) {
5419                 ext4_msg(sb, KERN_ERR, "required journal recovery "
5420                        "suppressed and not mounted read-only");
5421                 goto failed_mount3a;
5422         } else {
5423                 const char *journal_option;
5424
5425                 /* Nojournal mode, all journal mount options are illegal */
5426                 journal_option = ext4_has_journal_option(sb);
5427                 if (journal_option != NULL) {
5428                         ext4_msg(sb, KERN_ERR,
5429                                  "can't mount with %s, fs mounted w/o journal",
5430                                  journal_option);
5431                         goto failed_mount3a;
5432                 }
5433
5434                 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5435                 clear_opt(sb, JOURNAL_CHECKSUM);
5436                 clear_opt(sb, DATA_FLAGS);
5437                 clear_opt2(sb, JOURNAL_FAST_COMMIT);
5438                 sbi->s_journal = NULL;
5439                 needs_recovery = 0;
5440         }
5441
5442         if (!test_opt(sb, NO_MBCACHE)) {
5443                 sbi->s_ea_block_cache = ext4_xattr_create_cache();
5444                 if (!sbi->s_ea_block_cache) {
5445                         ext4_msg(sb, KERN_ERR,
5446                                  "Failed to create ea_block_cache");
5447                         err = -EINVAL;
5448                         goto failed_mount_wq;
5449                 }
5450
5451                 if (ext4_has_feature_ea_inode(sb)) {
5452                         sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5453                         if (!sbi->s_ea_inode_cache) {
5454                                 ext4_msg(sb, KERN_ERR,
5455                                          "Failed to create ea_inode_cache");
5456                                 err = -EINVAL;
5457                                 goto failed_mount_wq;
5458                         }
5459                 }
5460         }
5461
5462         /*
5463          * Get the # of file system overhead blocks from the
5464          * superblock if present.
5465          */
5466         sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5467         /* ignore the precalculated value if it is ridiculous */
5468         if (sbi->s_overhead > ext4_blocks_count(es))
5469                 sbi->s_overhead = 0;
5470         /*
5471          * If the bigalloc feature is not enabled recalculating the
5472          * overhead doesn't take long, so we might as well just redo
5473          * it to make sure we are using the correct value.
5474          */
5475         if (!ext4_has_feature_bigalloc(sb))
5476                 sbi->s_overhead = 0;
5477         if (sbi->s_overhead == 0) {
5478                 err = ext4_calculate_overhead(sb);
5479                 if (err)
5480                         goto failed_mount_wq;
5481         }
5482
5483         /*
5484          * The maximum number of concurrent works can be high and
5485          * concurrency isn't really necessary.  Limit it to 1.
5486          */
5487         EXT4_SB(sb)->rsv_conversion_wq =
5488                 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5489         if (!EXT4_SB(sb)->rsv_conversion_wq) {
5490                 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5491                 err = -ENOMEM;
5492                 goto failed_mount4;
5493         }
5494
5495         /*
5496          * The jbd2_journal_load will have done any necessary log recovery,
5497          * so we can safely mount the rest of the filesystem now.
5498          */
5499
5500         root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5501         if (IS_ERR(root)) {
5502                 ext4_msg(sb, KERN_ERR, "get root inode failed");
5503                 err = PTR_ERR(root);
5504                 root = NULL;
5505                 goto failed_mount4;
5506         }
5507         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5508                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5509                 iput(root);
5510                 err = -EFSCORRUPTED;
5511                 goto failed_mount4;
5512         }
5513
5514         generic_set_sb_d_ops(sb);
5515         sb->s_root = d_make_root(root);
5516         if (!sb->s_root) {
5517                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
5518                 err = -ENOMEM;
5519                 goto failed_mount4;
5520         }
5521
5522         err = ext4_setup_super(sb, es, sb_rdonly(sb));
5523         if (err == -EROFS) {
5524                 sb->s_flags |= SB_RDONLY;
5525         } else if (err)
5526                 goto failed_mount4a;
5527
5528         ext4_set_resv_clusters(sb);
5529
5530         if (test_opt(sb, BLOCK_VALIDITY)) {
5531                 err = ext4_setup_system_zone(sb);
5532                 if (err) {
5533                         ext4_msg(sb, KERN_ERR, "failed to initialize system "
5534                                  "zone (%d)", err);
5535                         goto failed_mount4a;
5536                 }
5537         }
5538         ext4_fc_replay_cleanup(sb);
5539
5540         ext4_ext_init(sb);
5541
5542         /*
5543          * Enable optimize_scan if number of groups is > threshold. This can be
5544          * turned off by passing "mb_optimize_scan=0". This can also be
5545          * turned on forcefully by passing "mb_optimize_scan=1".
5546          */
5547         if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5548                 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5549                         set_opt2(sb, MB_OPTIMIZE_SCAN);
5550                 else
5551                         clear_opt2(sb, MB_OPTIMIZE_SCAN);
5552         }
5553
5554         err = ext4_mb_init(sb);
5555         if (err) {
5556                 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5557                          err);
5558                 goto failed_mount5;
5559         }
5560
5561         /*
5562          * We can only set up the journal commit callback once
5563          * mballoc is initialized
5564          */
5565         if (sbi->s_journal)
5566                 sbi->s_journal->j_commit_callback =
5567                         ext4_journal_commit_callback;
5568
5569         err = ext4_percpu_param_init(sbi);
5570         if (err)
5571                 goto failed_mount6;
5572
5573         if (ext4_has_feature_flex_bg(sb))
5574                 if (!ext4_fill_flex_info(sb)) {
5575                         ext4_msg(sb, KERN_ERR,
5576                                "unable to initialize "
5577                                "flex_bg meta info!");
5578                         err = -ENOMEM;
5579                         goto failed_mount6;
5580                 }
5581
5582         err = ext4_register_li_request(sb, first_not_zeroed);
5583         if (err)
5584                 goto failed_mount6;
5585
5586         err = ext4_init_orphan_info(sb);
5587         if (err)
5588                 goto failed_mount7;
5589 #ifdef CONFIG_QUOTA
5590         /* Enable quota usage during mount. */
5591         if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5592                 err = ext4_enable_quotas(sb);
5593                 if (err)
5594                         goto failed_mount8;
5595         }
5596 #endif  /* CONFIG_QUOTA */
5597
5598         /*
5599          * Save the original bdev mapping's wb_err value which could be
5600          * used to detect the metadata async write error.
5601          */
5602         errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
5603                                  &sbi->s_bdev_wb_err);
5604         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5605         ext4_orphan_cleanup(sb, es);
5606         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5607         /*
5608          * Update the checksum after updating free space/inode counters and
5609          * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5610          * checksum in the buffer cache until it is written out and
5611          * e2fsprogs programs trying to open a file system immediately
5612          * after it is mounted can fail.
5613          */
5614         ext4_superblock_csum_set(sb);
5615         if (needs_recovery) {
5616                 ext4_msg(sb, KERN_INFO, "recovery complete");
5617                 err = ext4_mark_recovery_complete(sb, es);
5618                 if (err)
5619                         goto failed_mount9;
5620         }
5621
5622         if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) {
5623                 ext4_msg(sb, KERN_WARNING,
5624                          "mounting with \"discard\" option, but the device does not support discard");
5625                 clear_opt(sb, DISCARD);
5626         }
5627
5628         if (es->s_error_count)
5629                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5630
5631         /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5632         ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5633         ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5634         ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5635         atomic_set(&sbi->s_warning_count, 0);
5636         atomic_set(&sbi->s_msg_count, 0);
5637
5638         /* Register sysfs after all initializations are complete. */
5639         err = ext4_register_sysfs(sb);
5640         if (err)
5641                 goto failed_mount9;
5642
5643         return 0;
5644
5645 failed_mount9:
5646         ext4_quotas_off(sb, EXT4_MAXQUOTAS);
5647 failed_mount8: __maybe_unused
5648         ext4_release_orphan_info(sb);
5649 failed_mount7:
5650         ext4_unregister_li_request(sb);
5651 failed_mount6:
5652         ext4_mb_release(sb);
5653         ext4_flex_groups_free(sbi);
5654         ext4_percpu_param_destroy(sbi);
5655 failed_mount5:
5656         ext4_ext_release(sb);
5657         ext4_release_system_zone(sb);
5658 failed_mount4a:
5659         dput(sb->s_root);
5660         sb->s_root = NULL;
5661 failed_mount4:
5662         ext4_msg(sb, KERN_ERR, "mount failed");
5663         if (EXT4_SB(sb)->rsv_conversion_wq)
5664                 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5665 failed_mount_wq:
5666         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5667         sbi->s_ea_inode_cache = NULL;
5668
5669         ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5670         sbi->s_ea_block_cache = NULL;
5671
5672         if (sbi->s_journal) {
5673                 ext4_journal_destroy(sbi, sbi->s_journal);
5674         }
5675 failed_mount3a:
5676         ext4_es_unregister_shrinker(sbi);
5677 failed_mount3:
5678         /* flush s_sb_upd_work before sbi destroy */
5679         flush_work(&sbi->s_sb_upd_work);
5680         ext4_stop_mmpd(sbi);
5681         timer_delete_sync(&sbi->s_err_report);
5682         ext4_group_desc_free(sbi);
5683 failed_mount:
5684 #if IS_ENABLED(CONFIG_UNICODE)
5685         utf8_unload(sb->s_encoding);
5686 #endif
5687
5688 #ifdef CONFIG_QUOTA
5689         for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
5690                 kfree(get_qf_name(sb, sbi, i));
5691 #endif
5692         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5693         brelse(sbi->s_sbh);
5694         if (sbi->s_journal_bdev_file) {
5695                 invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
5696                 bdev_fput(sbi->s_journal_bdev_file);
5697         }
5698 out_fail:
5699         invalidate_bdev(sb->s_bdev);
5700         sb->s_fs_info = NULL;
5701         return err;
5702 }
5703
5704 static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5705 {
5706         struct ext4_fs_context *ctx = fc->fs_private;
5707         struct ext4_sb_info *sbi;
5708         const char *descr;
5709         int ret;
5710
5711         sbi = ext4_alloc_sbi(sb);
5712         if (!sbi)
5713                 return -ENOMEM;
5714
5715         fc->s_fs_info = sbi;
5716
5717         /* Cleanup superblock name */
5718         strreplace(sb->s_id, '/', '!');
5719
5720         sbi->s_sb_block = 1;    /* Default super block location */
5721         if (ctx->spec & EXT4_SPEC_s_sb_block)
5722                 sbi->s_sb_block = ctx->s_sb_block;
5723
5724         ret = __ext4_fill_super(fc, sb);
5725         if (ret < 0)
5726                 goto free_sbi;
5727
5728         if (sbi->s_journal) {
5729                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5730                         descr = " journalled data mode";
5731                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5732                         descr = " ordered data mode";
5733                 else
5734                         descr = " writeback data mode";
5735         } else
5736                 descr = "out journal";
5737
5738         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5739                 ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
5740                          "Quota mode: %s.", &sb->s_uuid,
5741                          sb_rdonly(sb) ? "ro" : "r/w", descr,
5742                          ext4_quota_mode(sb));
5743
5744         /* Update the s_overhead_clusters if necessary */
5745         ext4_update_overhead(sb, false);
5746         return 0;
5747
5748 free_sbi:
5749         ext4_free_sbi(sbi);
5750         fc->s_fs_info = NULL;
5751         return ret;
5752 }
5753
5754 static int ext4_get_tree(struct fs_context *fc)
5755 {
5756         return get_tree_bdev(fc, ext4_fill_super);
5757 }
5758
5759 /*
5760  * Setup any per-fs journal parameters now.  We'll do this both on
5761  * initial mount, once the journal has been initialised but before we've
5762  * done any recovery; and again on any subsequent remount.
5763  */
5764 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5765 {
5766         struct ext4_sb_info *sbi = EXT4_SB(sb);
5767
5768         journal->j_commit_interval = sbi->s_commit_interval;
5769         journal->j_min_batch_time = sbi->s_min_batch_time;
5770         journal->j_max_batch_time = sbi->s_max_batch_time;
5771         ext4_fc_init(sb, journal);
5772
5773         write_lock(&journal->j_state_lock);
5774         if (test_opt(sb, BARRIER))
5775                 journal->j_flags |= JBD2_BARRIER;
5776         else
5777                 journal->j_flags &= ~JBD2_BARRIER;
5778         /*
5779          * Always enable journal cycle record option, letting the journal
5780          * records log transactions continuously between each mount.
5781          */
5782         journal->j_flags |= JBD2_CYCLE_RECORD;
5783         write_unlock(&journal->j_state_lock);
5784 }
5785
5786 static struct inode *ext4_get_journal_inode(struct super_block *sb,
5787                                              unsigned int journal_inum)
5788 {
5789         struct inode *journal_inode;
5790
5791         /*
5792          * Test for the existence of a valid inode on disk.  Bad things
5793          * happen if we iget() an unused inode, as the subsequent iput()
5794          * will try to delete it.
5795          */
5796         journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5797         if (IS_ERR(journal_inode)) {
5798                 ext4_msg(sb, KERN_ERR, "no journal found");
5799                 return ERR_CAST(journal_inode);
5800         }
5801         if (!journal_inode->i_nlink) {
5802                 make_bad_inode(journal_inode);
5803                 iput(journal_inode);
5804                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5805                 return ERR_PTR(-EFSCORRUPTED);
5806         }
5807         if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5808                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
5809                 iput(journal_inode);
5810                 return ERR_PTR(-EFSCORRUPTED);
5811         }
5812
5813         ext4_debug("Journal inode found at %p: %lld bytes\n",
5814                   journal_inode, journal_inode->i_size);
5815         return journal_inode;
5816 }
5817
5818 static int ext4_journal_bmap(journal_t *journal, sector_t *block)
5819 {
5820         struct ext4_map_blocks map;
5821         int ret;
5822
5823         if (journal->j_inode == NULL)
5824                 return 0;
5825
5826         map.m_lblk = *block;
5827         map.m_len = 1;
5828         ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
5829         if (ret <= 0) {
5830                 ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
5831                          "journal bmap failed: block %llu ret %d\n",
5832                          *block, ret);
5833                 jbd2_journal_abort(journal, ret ? ret : -EIO);
5834                 return ret;
5835         }
5836         *block = map.m_pblk;
5837         return 0;
5838 }
5839
5840 static journal_t *ext4_open_inode_journal(struct super_block *sb,
5841                                           unsigned int journal_inum)
5842 {
5843         struct inode *journal_inode;
5844         journal_t *journal;
5845
5846         journal_inode = ext4_get_journal_inode(sb, journal_inum);
5847         if (IS_ERR(journal_inode))
5848                 return ERR_CAST(journal_inode);
5849
5850         journal = jbd2_journal_init_inode(journal_inode);
5851         if (IS_ERR(journal)) {
5852                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5853                 iput(journal_inode);
5854                 return ERR_CAST(journal);
5855         }
5856         journal->j_private = sb;
5857         journal->j_bmap = ext4_journal_bmap;
5858         ext4_init_journal_params(sb, journal);
5859         return journal;
5860 }
5861
5862 static struct file *ext4_get_journal_blkdev(struct super_block *sb,
5863                                         dev_t j_dev, ext4_fsblk_t *j_start,
5864                                         ext4_fsblk_t *j_len)
5865 {
5866         struct buffer_head *bh;
5867         struct block_device *bdev;
5868         struct file *bdev_file;
5869         int hblock, blocksize;
5870         ext4_fsblk_t sb_block;
5871         unsigned long offset;
5872         struct ext4_super_block *es;
5873         int errno;
5874
5875         bdev_file = bdev_file_open_by_dev(j_dev,
5876                 BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
5877                 sb, &fs_holder_ops);
5878         if (IS_ERR(bdev_file)) {
5879                 ext4_msg(sb, KERN_ERR,
5880                          "failed to open journal device unknown-block(%u,%u) %ld",
5881                          MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file));
5882                 return bdev_file;
5883         }
5884
5885         bdev = file_bdev(bdev_file);
5886         blocksize = sb->s_blocksize;
5887         hblock = bdev_logical_block_size(bdev);
5888         if (blocksize < hblock) {
5889                 ext4_msg(sb, KERN_ERR,
5890                         "blocksize too small for journal device");
5891                 errno = -EINVAL;
5892                 goto out_bdev;
5893         }
5894
5895         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5896         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5897         set_blocksize(bdev_file, blocksize);
5898         bh = __bread(bdev, sb_block, blocksize);
5899         if (!bh) {
5900                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5901                        "external journal");
5902                 errno = -EINVAL;
5903                 goto out_bdev;
5904         }
5905
5906         es = (struct ext4_super_block *) (bh->b_data + offset);
5907         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5908             !(le32_to_cpu(es->s_feature_incompat) &
5909               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5910                 ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
5911                 errno = -EFSCORRUPTED;
5912                 goto out_bh;
5913         }
5914
5915         if ((le32_to_cpu(es->s_feature_ro_compat) &
5916              EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5917             es->s_checksum != ext4_superblock_csum(es)) {
5918                 ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
5919                 errno = -EFSCORRUPTED;
5920                 goto out_bh;
5921         }
5922
5923         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5924                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5925                 errno = -EFSCORRUPTED;
5926                 goto out_bh;
5927         }
5928
5929         *j_start = sb_block + 1;
5930         *j_len = ext4_blocks_count(es);
5931         brelse(bh);
5932         return bdev_file;
5933
5934 out_bh:
5935         brelse(bh);
5936 out_bdev:
5937         bdev_fput(bdev_file);
5938         return ERR_PTR(errno);
5939 }
5940
5941 static journal_t *ext4_open_dev_journal(struct super_block *sb,
5942                                         dev_t j_dev)
5943 {
5944         journal_t *journal;
5945         ext4_fsblk_t j_start;
5946         ext4_fsblk_t j_len;
5947         struct file *bdev_file;
5948         int errno = 0;
5949
5950         bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
5951         if (IS_ERR(bdev_file))
5952                 return ERR_CAST(bdev_file);
5953
5954         journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start,
5955                                         j_len, sb->s_blocksize);
5956         if (IS_ERR(journal)) {
5957                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
5958                 errno = PTR_ERR(journal);
5959                 goto out_bdev;
5960         }
5961         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5962                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5963                                         "user (unsupported) - %d",
5964                         be32_to_cpu(journal->j_superblock->s_nr_users));
5965                 errno = -EINVAL;
5966                 goto out_journal;
5967         }
5968         journal->j_private = sb;
5969         EXT4_SB(sb)->s_journal_bdev_file = bdev_file;
5970         ext4_init_journal_params(sb, journal);
5971         return journal;
5972
5973 out_journal:
5974         ext4_journal_destroy(EXT4_SB(sb), journal);
5975 out_bdev:
5976         bdev_fput(bdev_file);
5977         return ERR_PTR(errno);
5978 }
5979
5980 static int ext4_load_journal(struct super_block *sb,
5981                              struct ext4_super_block *es,
5982                              unsigned long journal_devnum)
5983 {
5984         journal_t *journal;
5985         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5986         dev_t journal_dev;
5987         int err = 0;
5988         int really_read_only;
5989         int journal_dev_ro;
5990
5991         if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5992                 return -EFSCORRUPTED;
5993
5994         if (journal_devnum &&
5995             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5996                 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5997                         "numbers have changed");
5998                 journal_dev = new_decode_dev(journal_devnum);
5999         } else
6000                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
6001
6002         if (journal_inum && journal_dev) {
6003                 ext4_msg(sb, KERN_ERR,
6004                          "filesystem has both journal inode and journal device!");
6005                 return -EINVAL;
6006         }
6007
6008         if (journal_inum) {
6009                 journal = ext4_open_inode_journal(sb, journal_inum);
6010                 if (IS_ERR(journal))
6011                         return PTR_ERR(journal);
6012         } else {
6013                 journal = ext4_open_dev_journal(sb, journal_dev);
6014                 if (IS_ERR(journal))
6015                         return PTR_ERR(journal);
6016         }
6017
6018         journal_dev_ro = bdev_read_only(journal->j_dev);
6019         really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
6020
6021         if (journal_dev_ro && !sb_rdonly(sb)) {
6022                 ext4_msg(sb, KERN_ERR,
6023                          "journal device read-only, try mounting with '-o ro'");
6024                 err = -EROFS;
6025                 goto err_out;
6026         }
6027
6028         /*
6029          * Are we loading a blank journal or performing recovery after a
6030          * crash?  For recovery, we need to check in advance whether we
6031          * can get read-write access to the device.
6032          */
6033         if (ext4_has_feature_journal_needs_recovery(sb)) {
6034                 if (sb_rdonly(sb)) {
6035                         ext4_msg(sb, KERN_INFO, "INFO: recovery "
6036                                         "required on readonly filesystem");
6037                         if (really_read_only) {
6038                                 ext4_msg(sb, KERN_ERR, "write access "
6039                                         "unavailable, cannot proceed "
6040                                         "(try mounting with noload)");
6041                                 err = -EROFS;
6042                                 goto err_out;
6043                         }
6044                         ext4_msg(sb, KERN_INFO, "write access will "
6045                                "be enabled during recovery");
6046                 }
6047         }
6048
6049         if (!(journal->j_flags & JBD2_BARRIER))
6050                 ext4_msg(sb, KERN_INFO, "barriers disabled");
6051
6052         if (!ext4_has_feature_journal_needs_recovery(sb))
6053                 err = jbd2_journal_wipe(journal, !really_read_only);
6054         if (!err) {
6055                 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
6056                 __le16 orig_state;
6057                 bool changed = false;
6058
6059                 if (save)
6060                         memcpy(save, ((char *) es) +
6061                                EXT4_S_ERR_START, EXT4_S_ERR_LEN);
6062                 err = jbd2_journal_load(journal);
6063                 if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
6064                                    save, EXT4_S_ERR_LEN)) {
6065                         memcpy(((char *) es) + EXT4_S_ERR_START,
6066                                save, EXT4_S_ERR_LEN);
6067                         changed = true;
6068                 }
6069                 kfree(save);
6070                 orig_state = es->s_state;
6071                 es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
6072                                            EXT4_ERROR_FS);
6073                 if (orig_state != es->s_state)
6074                         changed = true;
6075                 /* Write out restored error information to the superblock */
6076                 if (changed && !really_read_only) {
6077                         int err2;
6078                         err2 = ext4_commit_super(sb);
6079                         err = err ? : err2;
6080                 }
6081         }
6082
6083         if (err) {
6084                 ext4_msg(sb, KERN_ERR, "error loading journal");
6085                 goto err_out;
6086         }
6087
6088         EXT4_SB(sb)->s_journal = journal;
6089         err = ext4_clear_journal_err(sb, es);
6090         if (err) {
6091                 ext4_journal_destroy(EXT4_SB(sb), journal);
6092                 return err;
6093         }
6094
6095         if (!really_read_only && journal_devnum &&
6096             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
6097                 es->s_journal_dev = cpu_to_le32(journal_devnum);
6098                 ext4_commit_super(sb);
6099         }
6100         if (!really_read_only && journal_inum &&
6101             journal_inum != le32_to_cpu(es->s_journal_inum)) {
6102                 es->s_journal_inum = cpu_to_le32(journal_inum);
6103                 ext4_commit_super(sb);
6104         }
6105
6106         return 0;
6107
6108 err_out:
6109         ext4_journal_destroy(EXT4_SB(sb), journal);
6110         return err;
6111 }
6112
6113 /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
6114 static void ext4_update_super(struct super_block *sb)
6115 {
6116         struct ext4_sb_info *sbi = EXT4_SB(sb);
6117         struct ext4_super_block *es = sbi->s_es;
6118         struct buffer_head *sbh = sbi->s_sbh;
6119
6120         lock_buffer(sbh);
6121         /*
6122          * If the file system is mounted read-only, don't update the
6123          * superblock write time.  This avoids updating the superblock
6124          * write time when we are mounting the root file system
6125          * read/only but we need to replay the journal; at that point,
6126          * for people who are east of GMT and who make their clock
6127          * tick in localtime for Windows bug-for-bug compatibility,
6128          * the clock is set in the future, and this will cause e2fsck
6129          * to complain and force a full file system check.
6130          */
6131         if (!sb_rdonly(sb))
6132                 ext4_update_tstamp(es, s_wtime);
6133         es->s_kbytes_written =
6134                 cpu_to_le64(sbi->s_kbytes_written +
6135                     ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6136                       sbi->s_sectors_written_start) >> 1));
6137         if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6138                 ext4_free_blocks_count_set(es,
6139                         EXT4_C2B(sbi, percpu_counter_sum_positive(
6140                                 &sbi->s_freeclusters_counter)));
6141         if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6142                 es->s_free_inodes_count =
6143                         cpu_to_le32(percpu_counter_sum_positive(
6144                                 &sbi->s_freeinodes_counter));
6145         /* Copy error information to the on-disk superblock */
6146         spin_lock(&sbi->s_error_lock);
6147         if (sbi->s_add_error_count > 0) {
6148                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6149                 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6150                         __ext4_update_tstamp(&es->s_first_error_time,
6151                                              &es->s_first_error_time_hi,
6152                                              sbi->s_first_error_time);
6153                         strtomem_pad(es->s_first_error_func,
6154                                      sbi->s_first_error_func, 0);
6155                         es->s_first_error_line =
6156                                 cpu_to_le32(sbi->s_first_error_line);
6157                         es->s_first_error_ino =
6158                                 cpu_to_le32(sbi->s_first_error_ino);
6159                         es->s_first_error_block =
6160                                 cpu_to_le64(sbi->s_first_error_block);
6161                         es->s_first_error_errcode =
6162                                 ext4_errno_to_code(sbi->s_first_error_code);
6163                 }
6164                 __ext4_update_tstamp(&es->s_last_error_time,
6165                                      &es->s_last_error_time_hi,
6166                                      sbi->s_last_error_time);
6167                 strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
6168                 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6169                 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6170                 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6171                 es->s_last_error_errcode =
6172                                 ext4_errno_to_code(sbi->s_last_error_code);
6173                 /*
6174                  * Start the daily error reporting function if it hasn't been
6175                  * started already
6176                  */
6177                 if (!es->s_error_count)
6178                         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6179                 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6180                 sbi->s_add_error_count = 0;
6181         }
6182         spin_unlock(&sbi->s_error_lock);
6183
6184         ext4_superblock_csum_set(sb);
6185         unlock_buffer(sbh);
6186 }
6187
6188 static int ext4_commit_super(struct super_block *sb)
6189 {
6190         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6191
6192         if (!sbh)
6193                 return -EINVAL;
6194
6195         ext4_update_super(sb);
6196
6197         lock_buffer(sbh);
6198         /* Buffer got discarded which means block device got invalidated */
6199         if (!buffer_mapped(sbh)) {
6200                 unlock_buffer(sbh);
6201                 return -EIO;
6202         }
6203
6204         if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6205                 /*
6206                  * Oh, dear.  A previous attempt to write the
6207                  * superblock failed.  This could happen because the
6208                  * USB device was yanked out.  Or it could happen to
6209                  * be a transient write error and maybe the block will
6210                  * be remapped.  Nothing we can do but to retry the
6211                  * write and hope for the best.
6212                  */
6213                 ext4_msg(sb, KERN_ERR, "previous I/O error to "
6214                        "superblock detected");
6215                 clear_buffer_write_io_error(sbh);
6216                 set_buffer_uptodate(sbh);
6217         }
6218         get_bh(sbh);
6219         /* Clear potential dirty bit if it was journalled update */
6220         clear_buffer_dirty(sbh);
6221         sbh->b_end_io = end_buffer_write_sync;
6222         submit_bh(REQ_OP_WRITE | REQ_SYNC |
6223                   (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6224         wait_on_buffer(sbh);
6225         if (buffer_write_io_error(sbh)) {
6226                 ext4_msg(sb, KERN_ERR, "I/O error while writing "
6227                        "superblock");
6228                 clear_buffer_write_io_error(sbh);
6229                 set_buffer_uptodate(sbh);
6230                 return -EIO;
6231         }
6232         return 0;
6233 }
6234
6235 /*
6236  * Have we just finished recovery?  If so, and if we are mounting (or
6237  * remounting) the filesystem readonly, then we will end up with a
6238  * consistent fs on disk.  Record that fact.
6239  */
6240 static int ext4_mark_recovery_complete(struct super_block *sb,
6241                                        struct ext4_super_block *es)
6242 {
6243         int err;
6244         journal_t *journal = EXT4_SB(sb)->s_journal;
6245
6246         if (!ext4_has_feature_journal(sb)) {
6247                 if (journal != NULL) {
6248                         ext4_error(sb, "Journal got removed while the fs was "
6249                                    "mounted!");
6250                         return -EFSCORRUPTED;
6251                 }
6252                 return 0;
6253         }
6254         jbd2_journal_lock_updates(journal);
6255         err = jbd2_journal_flush(journal, 0);
6256         if (err < 0)
6257                 goto out;
6258
6259         if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6260             ext4_has_feature_orphan_present(sb))) {
6261                 if (!ext4_orphan_file_empty(sb)) {
6262                         ext4_error(sb, "Orphan file not empty on read-only fs.");
6263                         err = -EFSCORRUPTED;
6264                         goto out;
6265                 }
6266                 ext4_clear_feature_journal_needs_recovery(sb);
6267                 ext4_clear_feature_orphan_present(sb);
6268                 ext4_commit_super(sb);
6269         }
6270 out:
6271         jbd2_journal_unlock_updates(journal);
6272         return err;
6273 }
6274
6275 /*
6276  * If we are mounting (or read-write remounting) a filesystem whose journal
6277  * has recorded an error from a previous lifetime, move that error to the
6278  * main filesystem now.
6279  */
6280 static int ext4_clear_journal_err(struct super_block *sb,
6281                                    struct ext4_super_block *es)
6282 {
6283         journal_t *journal;
6284         int j_errno;
6285         const char *errstr;
6286
6287         if (!ext4_has_feature_journal(sb)) {
6288                 ext4_error(sb, "Journal got removed while the fs was mounted!");
6289                 return -EFSCORRUPTED;
6290         }
6291
6292         journal = EXT4_SB(sb)->s_journal;
6293
6294         /*
6295          * Now check for any error status which may have been recorded in the
6296          * journal by a prior ext4_error() or ext4_abort()
6297          */
6298
6299         j_errno = jbd2_journal_errno(journal);
6300         if (j_errno) {
6301                 char nbuf[16];
6302
6303                 errstr = ext4_decode_error(sb, j_errno, nbuf);
6304                 ext4_warning(sb, "Filesystem error recorded "
6305                              "from previous mount: %s", errstr);
6306
6307                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6308                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6309                 j_errno = ext4_commit_super(sb);
6310                 if (j_errno)
6311                         return j_errno;
6312                 ext4_warning(sb, "Marked fs in need of filesystem check.");
6313
6314                 jbd2_journal_clear_err(journal);
6315                 jbd2_journal_update_sb_errno(journal);
6316         }
6317         return 0;
6318 }
6319
6320 /*
6321  * Force the running and committing transactions to commit,
6322  * and wait on the commit.
6323  */
6324 int ext4_force_commit(struct super_block *sb)
6325 {
6326         return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
6327 }
6328
6329 static int ext4_sync_fs(struct super_block *sb, int wait)
6330 {
6331         int ret = 0;
6332         tid_t target;
6333         bool needs_barrier = false;
6334         struct ext4_sb_info *sbi = EXT4_SB(sb);
6335
6336         ret = ext4_emergency_state(sb);
6337         if (unlikely(ret))
6338                 return ret;
6339
6340         trace_ext4_sync_fs(sb, wait);
6341         flush_workqueue(sbi->rsv_conversion_wq);
6342         /*
6343          * Writeback quota in non-journalled quota case - journalled quota has
6344          * no dirty dquots
6345          */
6346         dquot_writeback_dquots(sb, -1);
6347         /*
6348          * Data writeback is possible w/o journal transaction, so barrier must
6349          * being sent at the end of the function. But we can skip it if
6350          * transaction_commit will do it for us.
6351          */
6352         if (sbi->s_journal) {
6353                 target = jbd2_get_latest_transaction(sbi->s_journal);
6354                 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6355                     !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6356                         needs_barrier = true;
6357
6358                 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6359                         if (wait)
6360                                 ret = jbd2_log_wait_commit(sbi->s_journal,
6361                                                            target);
6362                 }
6363         } else if (wait && test_opt(sb, BARRIER))
6364                 needs_barrier = true;
6365         if (needs_barrier) {
6366                 int err;
6367                 err = blkdev_issue_flush(sb->s_bdev);
6368                 if (!ret)
6369                         ret = err;
6370         }
6371
6372         return ret;
6373 }
6374
6375 /*
6376  * LVM calls this function before a (read-only) snapshot is created.  This
6377  * gives us a chance to flush the journal completely and mark the fs clean.
6378  *
6379  * Note that only this function cannot bring a filesystem to be in a clean
6380  * state independently. It relies on upper layer to stop all data & metadata
6381  * modifications.
6382  */
6383 static int ext4_freeze(struct super_block *sb)
6384 {
6385         int error = 0;
6386         journal_t *journal = EXT4_SB(sb)->s_journal;
6387
6388         if (journal) {
6389                 /* Now we set up the journal barrier. */
6390                 jbd2_journal_lock_updates(journal);
6391
6392                 /*
6393                  * Don't clear the needs_recovery flag if we failed to
6394                  * flush the journal.
6395                  */
6396                 error = jbd2_journal_flush(journal, 0);
6397                 if (error < 0)
6398                         goto out;
6399
6400                 /* Journal blocked and flushed, clear needs_recovery flag. */
6401                 ext4_clear_feature_journal_needs_recovery(sb);
6402                 if (ext4_orphan_file_empty(sb))
6403                         ext4_clear_feature_orphan_present(sb);
6404         }
6405
6406         error = ext4_commit_super(sb);
6407 out:
6408         if (journal)
6409                 /* we rely on upper layer to stop further updates */
6410                 jbd2_journal_unlock_updates(journal);
6411         return error;
6412 }
6413
6414 /*
6415  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
6416  * flag here, even though the filesystem is not technically dirty yet.
6417  */
6418 static int ext4_unfreeze(struct super_block *sb)
6419 {
6420         if (ext4_emergency_state(sb))
6421                 return 0;
6422
6423         if (EXT4_SB(sb)->s_journal) {
6424                 /* Reset the needs_recovery flag before the fs is unlocked. */
6425                 ext4_set_feature_journal_needs_recovery(sb);
6426                 if (ext4_has_feature_orphan_file(sb))
6427                         ext4_set_feature_orphan_present(sb);
6428         }
6429
6430         ext4_commit_super(sb);
6431         return 0;
6432 }
6433
6434 /*
6435  * Structure to save mount options for ext4_remount's benefit
6436  */
6437 struct ext4_mount_options {
6438         unsigned long s_mount_opt;
6439         unsigned long s_mount_opt2;
6440         kuid_t s_resuid;
6441         kgid_t s_resgid;
6442         unsigned long s_commit_interval;
6443         u32 s_min_batch_time, s_max_batch_time;
6444 #ifdef CONFIG_QUOTA
6445         int s_jquota_fmt;
6446         char *s_qf_names[EXT4_MAXQUOTAS];
6447 #endif
6448 };
6449
6450 static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6451 {
6452         struct ext4_fs_context *ctx = fc->fs_private;
6453         struct ext4_super_block *es;
6454         struct ext4_sb_info *sbi = EXT4_SB(sb);
6455         unsigned long old_sb_flags;
6456         struct ext4_mount_options old_opts;
6457         ext4_group_t g;
6458         int err = 0;
6459         int alloc_ctx;
6460 #ifdef CONFIG_QUOTA
6461         int enable_quota = 0;
6462         int i, j;
6463         char *to_free[EXT4_MAXQUOTAS];
6464 #endif
6465
6466
6467         /* Store the original options */
6468         old_sb_flags = sb->s_flags;
6469         old_opts.s_mount_opt = sbi->s_mount_opt;
6470         old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6471         old_opts.s_resuid = sbi->s_resuid;
6472         old_opts.s_resgid = sbi->s_resgid;
6473         old_opts.s_commit_interval = sbi->s_commit_interval;
6474         old_opts.s_min_batch_time = sbi->s_min_batch_time;
6475         old_opts.s_max_batch_time = sbi->s_max_batch_time;
6476 #ifdef CONFIG_QUOTA
6477         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6478         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6479                 if (sbi->s_qf_names[i]) {
6480                         char *qf_name = get_qf_name(sb, sbi, i);
6481
6482                         old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6483                         if (!old_opts.s_qf_names[i]) {
6484                                 for (j = 0; j < i; j++)
6485                                         kfree(old_opts.s_qf_names[j]);
6486                                 return -ENOMEM;
6487                         }
6488                 } else
6489                         old_opts.s_qf_names[i] = NULL;
6490 #endif
6491         if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6492                 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6493                         ctx->journal_ioprio =
6494                                 sbi->s_journal->j_task->io_context->ioprio;
6495                 else
6496                         ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
6497
6498         }
6499
6500         if ((ctx->spec & EXT4_SPEC_s_stripe) &&
6501             ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
6502                 ext4_msg(sb, KERN_WARNING,
6503                          "stripe (%lu) is not aligned with cluster size (%u), "
6504                          "stripe is disabled",
6505                          ctx->s_stripe, sbi->s_cluster_ratio);
6506                 ctx->s_stripe = 0;
6507         }
6508
6509         /*
6510          * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
6511          * two calls to ext4_should_dioread_nolock() to return inconsistent
6512          * values, triggering WARN_ON in ext4_add_complete_io(). we grab
6513          * here s_writepages_rwsem to avoid race between writepages ops and
6514          * remount.
6515          */
6516         alloc_ctx = ext4_writepages_down_write(sb);
6517         ext4_apply_options(fc, sb);
6518         ext4_writepages_up_write(sb, alloc_ctx);
6519
6520         if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6521             test_opt(sb, JOURNAL_CHECKSUM)) {
6522                 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6523                          "during remount not supported; ignoring");
6524                 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6525         }
6526
6527         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6528                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6529                         ext4_msg(sb, KERN_ERR, "can't mount with "
6530                                  "both data=journal and delalloc");
6531                         err = -EINVAL;
6532                         goto restore_opts;
6533                 }
6534                 if (test_opt(sb, DIOREAD_NOLOCK)) {
6535                         ext4_msg(sb, KERN_ERR, "can't mount with "
6536                                  "both data=journal and dioread_nolock");
6537                         err = -EINVAL;
6538                         goto restore_opts;
6539                 }
6540         } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6541                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6542                         ext4_msg(sb, KERN_ERR, "can't mount with "
6543                                 "journal_async_commit in data=ordered mode");
6544                         err = -EINVAL;
6545                         goto restore_opts;
6546                 }
6547         }
6548
6549         if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6550                 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6551                 err = -EINVAL;
6552                 goto restore_opts;
6553         }
6554
6555         if ((old_opts.s_mount_opt & EXT4_MOUNT_DELALLOC) &&
6556             !test_opt(sb, DELALLOC)) {
6557                 ext4_msg(sb, KERN_ERR, "can't disable delalloc during remount");
6558                 err = -EINVAL;
6559                 goto restore_opts;
6560         }
6561
6562         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6563                 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6564
6565         es = sbi->s_es;
6566
6567         if (sbi->s_journal) {
6568                 ext4_init_journal_params(sb, sbi->s_journal);
6569                 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6570         }
6571
6572         /* Flush outstanding errors before changing fs state */
6573         flush_work(&sbi->s_sb_upd_work);
6574
6575         if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6576                 if (ext4_emergency_state(sb)) {
6577                         err = -EROFS;
6578                         goto restore_opts;
6579                 }
6580
6581                 if (fc->sb_flags & SB_RDONLY) {
6582                         err = sync_filesystem(sb);
6583                         if (err < 0)
6584                                 goto restore_opts;
6585                         err = dquot_suspend(sb, -1);
6586                         if (err < 0)
6587                                 goto restore_opts;
6588
6589                         /*
6590                          * First of all, the unconditional stuff we have to do
6591                          * to disable replay of the journal when we next remount
6592                          */
6593                         sb->s_flags |= SB_RDONLY;
6594
6595                         /*
6596                          * OK, test if we are remounting a valid rw partition
6597                          * readonly, and if so set the rdonly flag and then
6598                          * mark the partition as valid again.
6599                          */
6600                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6601                             (sbi->s_mount_state & EXT4_VALID_FS))
6602                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
6603
6604                         if (sbi->s_journal) {
6605                                 /*
6606                                  * We let remount-ro finish even if marking fs
6607                                  * as clean failed...
6608                                  */
6609                                 ext4_mark_recovery_complete(sb, es);
6610                         }
6611                 } else {
6612                         /* Make sure we can mount this feature set readwrite */
6613                         if (ext4_has_feature_readonly(sb) ||
6614                             !ext4_feature_set_ok(sb, 0)) {
6615                                 err = -EROFS;
6616                                 goto restore_opts;
6617                         }
6618                         /*
6619                          * Make sure the group descriptor checksums
6620                          * are sane.  If they aren't, refuse to remount r/w.
6621                          */
6622                         for (g = 0; g < sbi->s_groups_count; g++) {
6623                                 struct ext4_group_desc *gdp =
6624                                         ext4_get_group_desc(sb, g, NULL);
6625
6626                                 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6627                                         ext4_msg(sb, KERN_ERR,
6628                "ext4_remount: Checksum for group %u failed (%u!=%u)",
6629                 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6630                                                le16_to_cpu(gdp->bg_checksum));
6631                                         err = -EFSBADCRC;
6632                                         goto restore_opts;
6633                                 }
6634                         }
6635
6636                         /*
6637                          * If we have an unprocessed orphan list hanging
6638                          * around from a previously readonly bdev mount,
6639                          * require a full umount/remount for now.
6640                          */
6641                         if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6642                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
6643                                        "remount RDWR because of unprocessed "
6644                                        "orphan inode list.  Please "
6645                                        "umount/remount instead");
6646                                 err = -EINVAL;
6647                                 goto restore_opts;
6648                         }
6649
6650                         /*
6651                          * Mounting a RDONLY partition read-write, so reread
6652                          * and store the current valid flag.  (It may have
6653                          * been changed by e2fsck since we originally mounted
6654                          * the partition.)
6655                          */
6656                         if (sbi->s_journal) {
6657                                 err = ext4_clear_journal_err(sb, es);
6658                                 if (err)
6659                                         goto restore_opts;
6660                         }
6661                         sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6662                                               ~EXT4_FC_REPLAY);
6663
6664                         err = ext4_setup_super(sb, es, 0);
6665                         if (err)
6666                                 goto restore_opts;
6667
6668                         sb->s_flags &= ~SB_RDONLY;
6669                         if (ext4_has_feature_mmp(sb)) {
6670                                 err = ext4_multi_mount_protect(sb,
6671                                                 le64_to_cpu(es->s_mmp_block));
6672                                 if (err)
6673                                         goto restore_opts;
6674                         }
6675 #ifdef CONFIG_QUOTA
6676                         enable_quota = 1;
6677 #endif
6678                 }
6679         }
6680
6681         /*
6682          * Handle creation of system zone data early because it can fail.
6683          * Releasing of existing data is done when we are sure remount will
6684          * succeed.
6685          */
6686         if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6687                 err = ext4_setup_system_zone(sb);
6688                 if (err)
6689                         goto restore_opts;
6690         }
6691
6692         if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6693                 err = ext4_commit_super(sb);
6694                 if (err)
6695                         goto restore_opts;
6696         }
6697
6698 #ifdef CONFIG_QUOTA
6699         if (enable_quota) {
6700                 if (sb_any_quota_suspended(sb))
6701                         dquot_resume(sb, -1);
6702                 else if (ext4_has_feature_quota(sb)) {
6703                         err = ext4_enable_quotas(sb);
6704                         if (err)
6705                                 goto restore_opts;
6706                 }
6707         }
6708         /* Release old quota file names */
6709         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6710                 kfree(old_opts.s_qf_names[i]);
6711 #endif
6712         if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6713                 ext4_release_system_zone(sb);
6714
6715         /*
6716          * Reinitialize lazy itable initialization thread based on
6717          * current settings
6718          */
6719         if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6720                 ext4_unregister_li_request(sb);
6721         else {
6722                 ext4_group_t first_not_zeroed;
6723                 first_not_zeroed = ext4_has_uninit_itable(sb);
6724                 ext4_register_li_request(sb, first_not_zeroed);
6725         }
6726
6727         if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6728                 ext4_stop_mmpd(sbi);
6729
6730         /*
6731          * Handle aborting the filesystem as the last thing during remount to
6732          * avoid obsure errors during remount when some option changes fail to
6733          * apply due to shutdown filesystem.
6734          */
6735         if (test_opt2(sb, ABORT))
6736                 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6737
6738         return 0;
6739
6740 restore_opts:
6741         /*
6742          * If there was a failing r/w to ro transition, we may need to
6743          * re-enable quota
6744          */
6745         if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
6746             sb_any_quota_suspended(sb))
6747                 dquot_resume(sb, -1);
6748
6749         alloc_ctx = ext4_writepages_down_write(sb);
6750         sb->s_flags = old_sb_flags;
6751         sbi->s_mount_opt = old_opts.s_mount_opt;
6752         sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6753         sbi->s_resuid = old_opts.s_resuid;
6754         sbi->s_resgid = old_opts.s_resgid;
6755         sbi->s_commit_interval = old_opts.s_commit_interval;
6756         sbi->s_min_batch_time = old_opts.s_min_batch_time;
6757         sbi->s_max_batch_time = old_opts.s_max_batch_time;
6758         ext4_writepages_up_write(sb, alloc_ctx);
6759
6760         if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6761                 ext4_release_system_zone(sb);
6762 #ifdef CONFIG_QUOTA
6763         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6764         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6765                 to_free[i] = get_qf_name(sb, sbi, i);
6766                 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6767         }
6768         synchronize_rcu();
6769         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6770                 kfree(to_free[i]);
6771 #endif
6772         if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6773                 ext4_stop_mmpd(sbi);
6774         return err;
6775 }
6776
6777 static int ext4_reconfigure(struct fs_context *fc)
6778 {
6779         struct super_block *sb = fc->root->d_sb;
6780         int ret;
6781         bool old_ro = sb_rdonly(sb);
6782
6783         fc->s_fs_info = EXT4_SB(sb);
6784
6785         ret = ext4_check_opt_consistency(fc, sb);
6786         if (ret < 0)
6787                 return ret;
6788
6789         ret = __ext4_remount(fc, sb);
6790         if (ret < 0)
6791                 return ret;
6792
6793         ext4_msg(sb, KERN_INFO, "re-mounted %pU%s.",
6794                  &sb->s_uuid,
6795                  (old_ro != sb_rdonly(sb)) ? (sb_rdonly(sb) ? " ro" : " r/w") : "");
6796
6797         return 0;
6798 }
6799
6800 #ifdef CONFIG_QUOTA
6801 static int ext4_statfs_project(struct super_block *sb,
6802                                kprojid_t projid, struct kstatfs *buf)
6803 {
6804         struct kqid qid;
6805         struct dquot *dquot;
6806         u64 limit;
6807         u64 curblock;
6808
6809         qid = make_kqid_projid(projid);
6810         dquot = dqget(sb, qid);
6811         if (IS_ERR(dquot))
6812                 return PTR_ERR(dquot);
6813         spin_lock(&dquot->dq_dqb_lock);
6814
6815         limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6816                              dquot->dq_dqb.dqb_bhardlimit);
6817         limit >>= sb->s_blocksize_bits;
6818
6819         if (limit) {
6820                 uint64_t        remaining = 0;
6821
6822                 curblock = (dquot->dq_dqb.dqb_curspace +
6823                             dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6824                 if (limit > curblock)
6825                         remaining = limit - curblock;
6826
6827                 buf->f_blocks = min(buf->f_blocks, limit);
6828                 buf->f_bfree = min(buf->f_bfree, remaining);
6829                 buf->f_bavail = min(buf->f_bavail, remaining);
6830         }
6831
6832         limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6833                              dquot->dq_dqb.dqb_ihardlimit);
6834         if (limit) {
6835                 uint64_t        remaining = 0;
6836
6837                 if (limit > dquot->dq_dqb.dqb_curinodes)
6838                         remaining = limit - dquot->dq_dqb.dqb_curinodes;
6839
6840                 buf->f_files = min(buf->f_files, limit);
6841                 buf->f_ffree = min(buf->f_ffree, remaining);
6842         }
6843
6844         spin_unlock(&dquot->dq_dqb_lock);
6845         dqput(dquot);
6846         return 0;
6847 }
6848 #endif
6849
6850 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6851 {
6852         struct super_block *sb = dentry->d_sb;
6853         struct ext4_sb_info *sbi = EXT4_SB(sb);
6854         struct ext4_super_block *es = sbi->s_es;
6855         ext4_fsblk_t overhead = 0, resv_blocks;
6856         s64 bfree;
6857         resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6858
6859         if (!test_opt(sb, MINIX_DF))
6860                 overhead = sbi->s_overhead;
6861
6862         buf->f_type = EXT4_SUPER_MAGIC;
6863         buf->f_bsize = sb->s_blocksize;
6864         buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6865         bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6866                 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6867         /* prevent underflow in case that few free space is available */
6868         buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6869         buf->f_bavail = buf->f_bfree -
6870                         (ext4_r_blocks_count(es) + resv_blocks);
6871         if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6872                 buf->f_bavail = 0;
6873         buf->f_files = le32_to_cpu(es->s_inodes_count);
6874         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6875         buf->f_namelen = EXT4_NAME_LEN;
6876         buf->f_fsid = uuid_to_fsid(es->s_uuid);
6877
6878 #ifdef CONFIG_QUOTA
6879         if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6880             sb_has_quota_limits_enabled(sb, PRJQUOTA))
6881                 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6882 #endif
6883         return 0;
6884 }
6885
6886
6887 #ifdef CONFIG_QUOTA
6888
6889 /*
6890  * Helper functions so that transaction is started before we acquire dqio_sem
6891  * to keep correct lock ordering of transaction > dqio_sem
6892  */
6893 static inline struct inode *dquot_to_inode(struct dquot *dquot)
6894 {
6895         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6896 }
6897
6898 static int ext4_write_dquot(struct dquot *dquot)
6899 {
6900         int ret, err;
6901         handle_t *handle;
6902         struct inode *inode;
6903
6904         inode = dquot_to_inode(dquot);
6905         handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6906                                     EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6907         if (IS_ERR(handle))
6908                 return PTR_ERR(handle);
6909         ret = dquot_commit(dquot);
6910         if (ret < 0)
6911                 ext4_error_err(dquot->dq_sb, -ret,
6912                                "Failed to commit dquot type %d",
6913                                dquot->dq_id.type);
6914         err = ext4_journal_stop(handle);
6915         if (!ret)
6916                 ret = err;
6917         return ret;
6918 }
6919
6920 static int ext4_acquire_dquot(struct dquot *dquot)
6921 {
6922         int ret, err;
6923         handle_t *handle;
6924
6925         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6926                                     EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6927         if (IS_ERR(handle))
6928                 return PTR_ERR(handle);
6929         ret = dquot_acquire(dquot);
6930         if (ret < 0)
6931                 ext4_error_err(dquot->dq_sb, -ret,
6932                               "Failed to acquire dquot type %d",
6933                               dquot->dq_id.type);
6934         err = ext4_journal_stop(handle);
6935         if (!ret)
6936                 ret = err;
6937         return ret;
6938 }
6939
6940 static int ext4_release_dquot(struct dquot *dquot)
6941 {
6942         int ret, err;
6943         handle_t *handle;
6944         bool freeze_protected = false;
6945
6946         /*
6947          * Trying to sb_start_intwrite() in a running transaction
6948          * can result in a deadlock. Further, running transactions
6949          * are already protected from freezing.
6950          */
6951         if (!ext4_journal_current_handle()) {
6952                 sb_start_intwrite(dquot->dq_sb);
6953                 freeze_protected = true;
6954         }
6955
6956         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6957                                     EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6958         if (IS_ERR(handle)) {
6959                 /* Release dquot anyway to avoid endless cycle in dqput() */
6960                 dquot_release(dquot);
6961                 if (freeze_protected)
6962                         sb_end_intwrite(dquot->dq_sb);
6963                 return PTR_ERR(handle);
6964         }
6965         ret = dquot_release(dquot);
6966         if (ret < 0)
6967                 ext4_error_err(dquot->dq_sb, -ret,
6968                                "Failed to release dquot type %d",
6969                                dquot->dq_id.type);
6970         err = ext4_journal_stop(handle);
6971         if (!ret)
6972                 ret = err;
6973
6974         if (freeze_protected)
6975                 sb_end_intwrite(dquot->dq_sb);
6976
6977         return ret;
6978 }
6979
6980 static int ext4_mark_dquot_dirty(struct dquot *dquot)
6981 {
6982         struct super_block *sb = dquot->dq_sb;
6983
6984         if (ext4_is_quota_journalled(sb)) {
6985                 dquot_mark_dquot_dirty(dquot);
6986                 return ext4_write_dquot(dquot);
6987         } else {
6988                 return dquot_mark_dquot_dirty(dquot);
6989         }
6990 }
6991
6992 static int ext4_write_info(struct super_block *sb, int type)
6993 {
6994         int ret, err;
6995         handle_t *handle;
6996
6997         /* Data block + inode block */
6998         handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6999         if (IS_ERR(handle))
7000                 return PTR_ERR(handle);
7001         ret = dquot_commit_info(sb, type);
7002         err = ext4_journal_stop(handle);
7003         if (!ret)
7004                 ret = err;
7005         return ret;
7006 }
7007
7008 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
7009 {
7010         struct ext4_inode_info *ei = EXT4_I(inode);
7011
7012         /* The first argument of lockdep_set_subclass has to be
7013          * *exactly* the same as the argument to init_rwsem() --- in
7014          * this case, in init_once() --- or lockdep gets unhappy
7015          * because the name of the lock is set using the
7016          * stringification of the argument to init_rwsem().
7017          */
7018         (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
7019         lockdep_set_subclass(&ei->i_data_sem, subclass);
7020 }
7021
7022 /*
7023  * Standard function to be called on quota_on
7024  */
7025 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
7026                          const struct path *path)
7027 {
7028         int err;
7029
7030         if (!test_opt(sb, QUOTA))
7031                 return -EINVAL;
7032
7033         /* Quotafile not on the same filesystem? */
7034         if (path->dentry->d_sb != sb)
7035                 return -EXDEV;
7036
7037         /* Quota already enabled for this file? */
7038         if (IS_NOQUOTA(d_inode(path->dentry)))
7039                 return -EBUSY;
7040
7041         /* Journaling quota? */
7042         if (EXT4_SB(sb)->s_qf_names[type]) {
7043                 /* Quotafile not in fs root? */
7044                 if (path->dentry->d_parent != sb->s_root)
7045                         ext4_msg(sb, KERN_WARNING,
7046                                 "Quota file not on filesystem root. "
7047                                 "Journaled quota will not work");
7048                 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
7049         } else {
7050                 /*
7051                  * Clear the flag just in case mount options changed since
7052                  * last time.
7053                  */
7054                 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
7055         }
7056
7057         lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
7058         err = dquot_quota_on(sb, type, format_id, path);
7059         if (!err) {
7060                 struct inode *inode = d_inode(path->dentry);
7061                 handle_t *handle;
7062
7063                 /*
7064                  * Set inode flags to prevent userspace from messing with quota
7065                  * files. If this fails, we return success anyway since quotas
7066                  * are already enabled and this is not a hard failure.
7067                  */
7068                 inode_lock(inode);
7069                 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7070                 if (IS_ERR(handle))
7071                         goto unlock_inode;
7072                 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
7073                 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
7074                                 S_NOATIME | S_IMMUTABLE);
7075                 err = ext4_mark_inode_dirty(handle, inode);
7076                 ext4_journal_stop(handle);
7077         unlock_inode:
7078                 inode_unlock(inode);
7079                 if (err)
7080                         dquot_quota_off(sb, type);
7081         }
7082         if (err)
7083                 lockdep_set_quota_inode(path->dentry->d_inode,
7084                                              I_DATA_SEM_NORMAL);
7085         return err;
7086 }
7087
7088 static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
7089 {
7090         switch (type) {
7091         case USRQUOTA:
7092                 return qf_inum == EXT4_USR_QUOTA_INO;
7093         case GRPQUOTA:
7094                 return qf_inum == EXT4_GRP_QUOTA_INO;
7095         case PRJQUOTA:
7096                 return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
7097         default:
7098                 BUG();
7099         }
7100 }
7101
7102 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
7103                              unsigned int flags)
7104 {
7105         int err;
7106         struct inode *qf_inode;
7107         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7108                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7109                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7110                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7111         };
7112
7113         BUG_ON(!ext4_has_feature_quota(sb));
7114
7115         if (!qf_inums[type])
7116                 return -EPERM;
7117
7118         if (!ext4_check_quota_inum(type, qf_inums[type])) {
7119                 ext4_error(sb, "Bad quota inum: %lu, type: %d",
7120                                 qf_inums[type], type);
7121                 return -EUCLEAN;
7122         }
7123
7124         qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7125         if (IS_ERR(qf_inode)) {
7126                 ext4_error(sb, "Bad quota inode: %lu, type: %d",
7127                                 qf_inums[type], type);
7128                 return PTR_ERR(qf_inode);
7129         }
7130
7131         /* Don't account quota for quota files to avoid recursion */
7132         qf_inode->i_flags |= S_NOQUOTA;
7133         lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7134         err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
7135         if (err)
7136                 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
7137         iput(qf_inode);
7138
7139         return err;
7140 }
7141
7142 /* Enable usage tracking for all quota types. */
7143 int ext4_enable_quotas(struct super_block *sb)
7144 {
7145         int type, err = 0;
7146         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7147                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7148                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7149                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7150         };
7151         bool quota_mopt[EXT4_MAXQUOTAS] = {
7152                 test_opt(sb, USRQUOTA),
7153                 test_opt(sb, GRPQUOTA),
7154                 test_opt(sb, PRJQUOTA),
7155         };
7156
7157         sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
7158         for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7159                 if (qf_inums[type]) {
7160                         err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
7161                                 DQUOT_USAGE_ENABLED |
7162                                 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7163                         if (err) {
7164                                 ext4_warning(sb,
7165                                         "Failed to enable quota tracking "
7166                                         "(type=%d, err=%d, ino=%lu). "
7167                                         "Please run e2fsck to fix.", type,
7168                                         err, qf_inums[type]);
7169
7170                                 ext4_quotas_off(sb, type);
7171                                 return err;
7172                         }
7173                 }
7174         }
7175         return 0;
7176 }
7177
7178 static int ext4_quota_off(struct super_block *sb, int type)
7179 {
7180         struct inode *inode = sb_dqopt(sb)->files[type];
7181         handle_t *handle;
7182         int err;
7183
7184         /* Force all delayed allocation blocks to be allocated.
7185          * Caller already holds s_umount sem */
7186         if (test_opt(sb, DELALLOC))
7187                 sync_filesystem(sb);
7188
7189         if (!inode || !igrab(inode))
7190                 goto out;
7191
7192         err = dquot_quota_off(sb, type);
7193         if (err || ext4_has_feature_quota(sb))
7194                 goto out_put;
7195         /*
7196          * When the filesystem was remounted read-only first, we cannot cleanup
7197          * inode flags here. Bad luck but people should be using QUOTA feature
7198          * these days anyway.
7199          */
7200         if (sb_rdonly(sb))
7201                 goto out_put;
7202
7203         inode_lock(inode);
7204         /*
7205          * Update modification times of quota files when userspace can
7206          * start looking at them. If we fail, we return success anyway since
7207          * this is not a hard failure and quotas are already disabled.
7208          */
7209         handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7210         if (IS_ERR(handle)) {
7211                 err = PTR_ERR(handle);
7212                 goto out_unlock;
7213         }
7214         EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7215         inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7216         inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
7217         err = ext4_mark_inode_dirty(handle, inode);
7218         ext4_journal_stop(handle);
7219 out_unlock:
7220         inode_unlock(inode);
7221 out_put:
7222         lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7223         iput(inode);
7224         return err;
7225 out:
7226         return dquot_quota_off(sb, type);
7227 }
7228
7229 /* Read data from quotafile - avoid pagecache and such because we cannot afford
7230  * acquiring the locks... As quota files are never truncated and quota code
7231  * itself serializes the operations (and no one else should touch the files)
7232  * we don't have to be afraid of races */
7233 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7234                                size_t len, loff_t off)
7235 {
7236         struct inode *inode = sb_dqopt(sb)->files[type];
7237         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7238         int offset = off & (sb->s_blocksize - 1);
7239         int tocopy;
7240         size_t toread;
7241         struct buffer_head *bh;
7242         loff_t i_size = i_size_read(inode);
7243
7244         if (off > i_size)
7245                 return 0;
7246         if (off+len > i_size)
7247                 len = i_size-off;
7248         toread = len;
7249         while (toread > 0) {
7250                 tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
7251                 bh = ext4_bread(NULL, inode, blk, 0);
7252                 if (IS_ERR(bh))
7253                         return PTR_ERR(bh);
7254                 if (!bh)        /* A hole? */
7255                         memset(data, 0, tocopy);
7256                 else
7257                         memcpy(data, bh->b_data+offset, tocopy);
7258                 brelse(bh);
7259                 offset = 0;
7260                 toread -= tocopy;
7261                 data += tocopy;
7262                 blk++;
7263         }
7264         return len;
7265 }
7266
7267 /* Write to quotafile (we know the transaction is already started and has
7268  * enough credits) */
7269 static ssize_t ext4_quota_write(struct super_block *sb, int type,
7270                                 const char *data, size_t len, loff_t off)
7271 {
7272         struct inode *inode = sb_dqopt(sb)->files[type];
7273         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7274         int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7275         int retries = 0;
7276         struct buffer_head *bh;
7277         handle_t *handle = journal_current_handle();
7278
7279         if (!handle) {
7280                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7281                         " cancelled because transaction is not started",
7282                         (unsigned long long)off, (unsigned long long)len);
7283                 return -EIO;
7284         }
7285         /*
7286          * Since we account only one data block in transaction credits,
7287          * then it is impossible to cross a block boundary.
7288          */
7289         if (sb->s_blocksize - offset < len) {
7290                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7291                         " cancelled because not block aligned",
7292                         (unsigned long long)off, (unsigned long long)len);
7293                 return -EIO;
7294         }
7295
7296         do {
7297                 bh = ext4_bread(handle, inode, blk,
7298                                 EXT4_GET_BLOCKS_CREATE |
7299                                 EXT4_GET_BLOCKS_METADATA_NOFAIL);
7300         } while (PTR_ERR(bh) == -ENOSPC &&
7301                  ext4_should_retry_alloc(inode->i_sb, &retries));
7302         if (IS_ERR(bh))
7303                 return PTR_ERR(bh);
7304         if (!bh)
7305                 goto out;
7306         BUFFER_TRACE(bh, "get write access");
7307         err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7308         if (err) {
7309                 brelse(bh);
7310                 return err;
7311         }
7312         lock_buffer(bh);
7313         memcpy(bh->b_data+offset, data, len);
7314         flush_dcache_folio(bh->b_folio);
7315         unlock_buffer(bh);
7316         err = ext4_handle_dirty_metadata(handle, NULL, bh);
7317         brelse(bh);
7318 out:
7319         if (inode->i_size < off + len) {
7320                 i_size_write(inode, off + len);
7321                 EXT4_I(inode)->i_disksize = inode->i_size;
7322                 err2 = ext4_mark_inode_dirty(handle, inode);
7323                 if (unlikely(err2 && !err))
7324                         err = err2;
7325         }
7326         return err ? err : len;
7327 }
7328 #endif
7329
7330 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
7331 static inline void register_as_ext2(void)
7332 {
7333         int err = register_filesystem(&ext2_fs_type);
7334         if (err)
7335                 printk(KERN_WARNING
7336                        "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7337 }
7338
7339 static inline void unregister_as_ext2(void)
7340 {
7341         unregister_filesystem(&ext2_fs_type);
7342 }
7343
7344 static inline int ext2_feature_set_ok(struct super_block *sb)
7345 {
7346         if (ext4_has_unknown_ext2_incompat_features(sb))
7347                 return 0;
7348         if (sb_rdonly(sb))
7349                 return 1;
7350         if (ext4_has_unknown_ext2_ro_compat_features(sb))
7351                 return 0;
7352         return 1;
7353 }
7354 #else
7355 static inline void register_as_ext2(void) { }
7356 static inline void unregister_as_ext2(void) { }
7357 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7358 #endif
7359
7360 static inline void register_as_ext3(void)
7361 {
7362         int err = register_filesystem(&ext3_fs_type);
7363         if (err)
7364                 printk(KERN_WARNING
7365                        "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7366 }
7367
7368 static inline void unregister_as_ext3(void)
7369 {
7370         unregister_filesystem(&ext3_fs_type);
7371 }
7372
7373 static inline int ext3_feature_set_ok(struct super_block *sb)
7374 {
7375         if (ext4_has_unknown_ext3_incompat_features(sb))
7376                 return 0;
7377         if (!ext4_has_feature_journal(sb))
7378                 return 0;
7379         if (sb_rdonly(sb))
7380                 return 1;
7381         if (ext4_has_unknown_ext3_ro_compat_features(sb))
7382                 return 0;
7383         return 1;
7384 }
7385
7386 static void ext4_kill_sb(struct super_block *sb)
7387 {
7388         struct ext4_sb_info *sbi = EXT4_SB(sb);
7389         struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL;
7390
7391         kill_block_super(sb);
7392
7393         if (bdev_file)
7394                 bdev_fput(bdev_file);
7395 }
7396
7397 static struct file_system_type ext4_fs_type = {
7398         .owner                  = THIS_MODULE,
7399         .name                   = "ext4",
7400         .init_fs_context        = ext4_init_fs_context,
7401         .parameters             = ext4_param_specs,
7402         .kill_sb                = ext4_kill_sb,
7403         .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
7404 };
7405 MODULE_ALIAS_FS("ext4");
7406
7407 static int __init ext4_init_fs(void)
7408 {
7409         int err;
7410
7411         ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7412         ext4_li_info = NULL;
7413
7414         /* Build-time check for flags consistency */
7415         ext4_check_flag_values();
7416
7417         err = ext4_init_es();
7418         if (err)
7419                 return err;
7420
7421         err = ext4_init_pending();
7422         if (err)
7423                 goto out7;
7424
7425         err = ext4_init_post_read_processing();
7426         if (err)
7427                 goto out6;
7428
7429         err = ext4_init_pageio();
7430         if (err)
7431                 goto out5;
7432
7433         err = ext4_init_system_zone();
7434         if (err)
7435                 goto out4;
7436
7437         err = ext4_init_sysfs();
7438         if (err)
7439                 goto out3;
7440
7441         err = ext4_init_mballoc();
7442         if (err)
7443                 goto out2;
7444         err = init_inodecache();
7445         if (err)
7446                 goto out1;
7447
7448         err = ext4_fc_init_dentry_cache();
7449         if (err)
7450                 goto out05;
7451
7452         register_as_ext3();
7453         register_as_ext2();
7454         err = register_filesystem(&ext4_fs_type);
7455         if (err)
7456                 goto out;
7457
7458         return 0;
7459 out:
7460         unregister_as_ext2();
7461         unregister_as_ext3();
7462         ext4_fc_destroy_dentry_cache();
7463 out05:
7464         destroy_inodecache();
7465 out1:
7466         ext4_exit_mballoc();
7467 out2:
7468         ext4_exit_sysfs();
7469 out3:
7470         ext4_exit_system_zone();
7471 out4:
7472         ext4_exit_pageio();
7473 out5:
7474         ext4_exit_post_read_processing();
7475 out6:
7476         ext4_exit_pending();
7477 out7:
7478         ext4_exit_es();
7479
7480         return err;
7481 }
7482
7483 static void __exit ext4_exit_fs(void)
7484 {
7485         ext4_destroy_lazyinit_thread();
7486         unregister_as_ext2();
7487         unregister_as_ext3();
7488         unregister_filesystem(&ext4_fs_type);
7489         ext4_fc_destroy_dentry_cache();
7490         destroy_inodecache();
7491         ext4_exit_mballoc();
7492         ext4_exit_sysfs();
7493         ext4_exit_system_zone();
7494         ext4_exit_pageio();
7495         ext4_exit_post_read_processing();
7496         ext4_exit_es();
7497         ext4_exit_pending();
7498 }
7499
7500 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7501 MODULE_DESCRIPTION("Fourth Extended Filesystem");
7502 MODULE_LICENSE("GPL");
7503 module_init(ext4_init_fs)
7504 module_exit(ext4_exit_fs)