ext4: get rid of super block and sbi from handle_mount_ops()
[linux-2.6-block.git] / fs / ext4 / super.c
CommitLineData
f5166768 1// SPDX-License-Identifier: GPL-2.0
ac27a0ec 2/*
617ba13b 3 * linux/fs/ext4/super.c
ac27a0ec
DK
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 * from
11 *
12 * linux/fs/minix/inode.c
13 *
14 * Copyright (C) 1991, 1992 Linus Torvalds
15 *
16 * Big-endian to little-endian byte-swapping/bitmaps by
17 * David S. Miller (davem@caip.rutgers.edu), 1995
18 */
19
20#include <linux/module.h>
21#include <linux/string.h>
22#include <linux/fs.h>
23#include <linux/time.h>
c5ca7c76 24#include <linux/vmalloc.h>
ac27a0ec
DK
25#include <linux/slab.h>
26#include <linux/init.h>
27#include <linux/blkdev.h>
66114cad 28#include <linux/backing-dev.h>
ac27a0ec 29#include <linux/parser.h>
ac27a0ec 30#include <linux/buffer_head.h>
a5694255 31#include <linux/exportfs.h>
ac27a0ec
DK
32#include <linux/vfs.h>
33#include <linux/random.h>
34#include <linux/mount.h>
35#include <linux/namei.h>
36#include <linux/quotaops.h>
37#include <linux/seq_file.h>
3197ebdb 38#include <linux/ctype.h>
1330593e 39#include <linux/log2.h>
717d50e4 40#include <linux/crc16.h>
ef510424 41#include <linux/dax.h>
7abc52c2 42#include <linux/cleancache.h>
7c0f6ba6 43#include <linux/uaccess.h>
ee73f9a5 44#include <linux/iversion.h>
c83ad55e 45#include <linux/unicode.h>
c6a564ff 46#include <linux/part_stat.h>
bfff6873
LC
47#include <linux/kthread.h>
48#include <linux/freezer.h>
9a089b21 49#include <linux/fsnotify.h>
e5a185c2
LC
50#include <linux/fs_context.h>
51#include <linux/fs_parser.h>
bfff6873 52
3dcf5451 53#include "ext4.h"
4a092d73 54#include "ext4_extents.h" /* Needed for trace points definition */
3dcf5451 55#include "ext4_jbd2.h"
ac27a0ec
DK
56#include "xattr.h"
57#include "acl.h"
3661d286 58#include "mballoc.h"
0c9ec4be 59#include "fsmap.h"
ac27a0ec 60
9bffad1e
TT
61#define CREATE_TRACE_POINTS
62#include <trace/events/ext4.h>
63
0b75a840 64static struct ext4_lazy_init *ext4_li_info;
59ebc7fd 65static DEFINE_MUTEX(ext4_li_mtx);
e294a537 66static struct ratelimit_state ext4_mount_msg_ratelimit;
9f6200bb 67
617ba13b 68static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
ac27a0ec 69 unsigned long journal_devnum);
2adf6da8 70static int ext4_show_options(struct seq_file *seq, struct dentry *root);
2d01ddc8 71static void ext4_update_super(struct super_block *sb);
4392fbc4 72static int ext4_commit_super(struct super_block *sb);
11215630 73static int ext4_mark_recovery_complete(struct super_block *sb,
2b2d6d01 74 struct ext4_super_block *es);
11215630
JK
75static int ext4_clear_journal_err(struct super_block *sb,
76 struct ext4_super_block *es);
617ba13b 77static int ext4_sync_fs(struct super_block *sb, int wait);
2b2d6d01
TT
78static int ext4_remount(struct super_block *sb, int *flags, char *data);
79static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
c4be0c1d 80static int ext4_unfreeze(struct super_block *sb);
c4be0c1d 81static int ext4_freeze(struct super_block *sb);
152a0836
AV
82static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
83 const char *dev_name, void *data);
2035e776
TT
84static inline int ext2_feature_set_ok(struct super_block *sb);
85static inline int ext3_feature_set_ok(struct super_block *sb);
bfff6873
LC
86static void ext4_destroy_lazyinit_thread(void);
87static void ext4_unregister_li_request(struct super_block *sb);
8f1f7453 88static void ext4_clear_request_list(void);
c6cb7e77
EW
89static struct inode *ext4_get_journal_inode(struct super_block *sb,
90 unsigned int journal_inum);
da812f61 91static int ext4_validate_options(struct fs_context *fc);
b6bd2435
LC
92static int ext4_check_opt_consistency(struct fs_context *fc,
93 struct super_block *sb);
6e47a3cc 94static int ext4_apply_options(struct fs_context *fc, struct super_block *sb);
ac27a0ec 95
e74031fd
JK
96/*
97 * Lock ordering
98 *
e74031fd 99 * page fault path:
d4f5258e
JK
100 * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
101 * -> page lock -> i_data_sem (rw)
e74031fd
JK
102 *
103 * buffered write path:
c1e8d7c6 104 * sb_start_write -> i_mutex -> mmap_lock
e74031fd
JK
105 * sb_start_write -> i_mutex -> transaction start -> page lock ->
106 * i_data_sem (rw)
107 *
108 * truncate:
d4f5258e
JK
109 * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
110 * page lock
111 * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
1d39834f 112 * i_data_sem (rw)
e74031fd
JK
113 *
114 * direct IO:
c1e8d7c6 115 * sb_start_write -> i_mutex -> mmap_lock
1d39834f 116 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
e74031fd
JK
117 *
118 * writepages:
119 * transaction start -> page lock(s) -> i_data_sem (rw)
120 */
121
c290ea01 122#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
2035e776
TT
123static struct file_system_type ext2_fs_type = {
124 .owner = THIS_MODULE,
125 .name = "ext2",
126 .mount = ext4_mount,
127 .kill_sb = kill_block_super,
128 .fs_flags = FS_REQUIRES_DEV,
129};
7f78e035 130MODULE_ALIAS_FS("ext2");
fa7614dd 131MODULE_ALIAS("ext2");
2035e776
TT
132#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
133#else
134#define IS_EXT2_SB(sb) (0)
135#endif
136
137
ba69f9ab
JK
138static struct file_system_type ext3_fs_type = {
139 .owner = THIS_MODULE,
140 .name = "ext3",
152a0836 141 .mount = ext4_mount,
ba69f9ab
JK
142 .kill_sb = kill_block_super,
143 .fs_flags = FS_REQUIRES_DEV,
144};
7f78e035 145MODULE_ALIAS_FS("ext3");
fa7614dd 146MODULE_ALIAS("ext3");
ba69f9ab 147#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
bd81d8ee 148
fa491b14 149
150static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags,
151 bh_end_io_t *end_io)
152{
153 /*
154 * buffer's verified bit is no longer valid after reading from
155 * disk again due to write out error, clear it to make sure we
156 * recheck the buffer contents.
157 */
158 clear_buffer_verified(bh);
159
160 bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
161 get_bh(bh);
162 submit_bh(REQ_OP_READ, op_flags, bh);
163}
164
165void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags,
166 bh_end_io_t *end_io)
167{
168 BUG_ON(!buffer_locked(bh));
169
170 if (ext4_buffer_uptodate(bh)) {
171 unlock_buffer(bh);
172 return;
173 }
174 __ext4_read_bh(bh, op_flags, end_io);
175}
176
177int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io)
178{
179 BUG_ON(!buffer_locked(bh));
180
181 if (ext4_buffer_uptodate(bh)) {
182 unlock_buffer(bh);
183 return 0;
184 }
185
186 __ext4_read_bh(bh, op_flags, end_io);
187
188 wait_on_buffer(bh);
189 if (buffer_uptodate(bh))
190 return 0;
191 return -EIO;
192}
193
194int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait)
195{
196 if (trylock_buffer(bh)) {
197 if (wait)
198 return ext4_read_bh(bh, op_flags, NULL);
199 ext4_read_bh_nowait(bh, op_flags, NULL);
200 return 0;
201 }
202 if (wait) {
203 wait_on_buffer(bh);
204 if (buffer_uptodate(bh))
205 return 0;
206 return -EIO;
207 }
208 return 0;
209}
210
fb265c9c 211/*
8394a6ab 212 * This works like __bread_gfp() except it uses ERR_PTR for error
fb265c9c
TT
213 * returns. Currently with sb_bread it's impossible to distinguish
214 * between ENOMEM and EIO situations (since both result in a NULL
215 * return.
216 */
8394a6ab 217static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
218 sector_t block, int op_flags,
219 gfp_t gfp)
fb265c9c 220{
2d069c08 221 struct buffer_head *bh;
222 int ret;
fb265c9c 223
8394a6ab 224 bh = sb_getblk_gfp(sb, block, gfp);
fb265c9c
TT
225 if (bh == NULL)
226 return ERR_PTR(-ENOMEM);
cf2834a5 227 if (ext4_buffer_uptodate(bh))
fb265c9c 228 return bh;
2d069c08 229
230 ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
231 if (ret) {
232 put_bh(bh);
233 return ERR_PTR(ret);
234 }
235 return bh;
fb265c9c
TT
236}
237
8394a6ab 238struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
239 int op_flags)
240{
241 return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
242}
243
244struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
245 sector_t block)
246{
247 return __ext4_sb_bread_gfp(sb, block, 0, 0);
248}
249
5df1d412 250void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
251{
252 struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
253
254 if (likely(bh)) {
255 ext4_read_bh_lock(bh, REQ_RAHEAD, false);
256 brelse(bh);
257 }
fb265c9c
TT
258}
259
d25425f8
DW
260static int ext4_verify_csum_type(struct super_block *sb,
261 struct ext4_super_block *es)
262{
e2b911c5 263 if (!ext4_has_feature_metadata_csum(sb))
d25425f8
DW
264 return 1;
265
266 return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
267}
268
a9c47317
DW
269static __le32 ext4_superblock_csum(struct super_block *sb,
270 struct ext4_super_block *es)
271{
272 struct ext4_sb_info *sbi = EXT4_SB(sb);
273 int offset = offsetof(struct ext4_super_block, s_checksum);
274 __u32 csum;
275
276 csum = ext4_chksum(sbi, ~0, (char *)es, offset);
277
278 return cpu_to_le32(csum);
279}
280
c197855e
SH
281static int ext4_superblock_csum_verify(struct super_block *sb,
282 struct ext4_super_block *es)
a9c47317 283{
9aa5d32b 284 if (!ext4_has_metadata_csum(sb))
a9c47317
DW
285 return 1;
286
287 return es->s_checksum == ext4_superblock_csum(sb, es);
288}
289
06db49e6 290void ext4_superblock_csum_set(struct super_block *sb)
a9c47317 291{
06db49e6
TT
292 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
293
9aa5d32b 294 if (!ext4_has_metadata_csum(sb))
a9c47317
DW
295 return;
296
297 es->s_checksum = ext4_superblock_csum(sb, es);
298}
299
8fadc143
AR
300ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
301 struct ext4_group_desc *bg)
bd81d8ee 302{
3a14589c 303 return le32_to_cpu(bg->bg_block_bitmap_lo) |
8fadc143 304 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 305 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
bd81d8ee
LV
306}
307
8fadc143
AR
308ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
309 struct ext4_group_desc *bg)
bd81d8ee 310{
5272f837 311 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
8fadc143 312 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 313 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
bd81d8ee
LV
314}
315
8fadc143
AR
316ext4_fsblk_t ext4_inode_table(struct super_block *sb,
317 struct ext4_group_desc *bg)
bd81d8ee 318{
5272f837 319 return le32_to_cpu(bg->bg_inode_table_lo) |
8fadc143 320 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 321 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
bd81d8ee
LV
322}
323
021b65bb
TT
324__u32 ext4_free_group_clusters(struct super_block *sb,
325 struct ext4_group_desc *bg)
560671a0
AK
326{
327 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
328 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 329 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
560671a0
AK
330}
331
332__u32 ext4_free_inodes_count(struct super_block *sb,
333 struct ext4_group_desc *bg)
334{
335 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
336 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 337 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
560671a0
AK
338}
339
340__u32 ext4_used_dirs_count(struct super_block *sb,
341 struct ext4_group_desc *bg)
342{
343 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
344 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 345 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
560671a0
AK
346}
347
348__u32 ext4_itable_unused_count(struct super_block *sb,
349 struct ext4_group_desc *bg)
350{
351 return le16_to_cpu(bg->bg_itable_unused_lo) |
352 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
0b8e58a1 353 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
560671a0
AK
354}
355
8fadc143
AR
356void ext4_block_bitmap_set(struct super_block *sb,
357 struct ext4_group_desc *bg, ext4_fsblk_t blk)
bd81d8ee 358{
3a14589c 359 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
8fadc143
AR
360 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
361 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
bd81d8ee
LV
362}
363
8fadc143
AR
364void ext4_inode_bitmap_set(struct super_block *sb,
365 struct ext4_group_desc *bg, ext4_fsblk_t blk)
bd81d8ee 366{
5272f837 367 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
8fadc143
AR
368 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
369 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
bd81d8ee
LV
370}
371
8fadc143
AR
372void ext4_inode_table_set(struct super_block *sb,
373 struct ext4_group_desc *bg, ext4_fsblk_t blk)
bd81d8ee 374{
5272f837 375 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
8fadc143
AR
376 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
377 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
bd81d8ee
LV
378}
379
021b65bb
TT
380void ext4_free_group_clusters_set(struct super_block *sb,
381 struct ext4_group_desc *bg, __u32 count)
560671a0
AK
382{
383 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
384 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
385 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
386}
387
388void ext4_free_inodes_set(struct super_block *sb,
389 struct ext4_group_desc *bg, __u32 count)
390{
391 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
392 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
393 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
394}
395
396void ext4_used_dirs_set(struct super_block *sb,
397 struct ext4_group_desc *bg, __u32 count)
398{
399 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
400 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
401 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
402}
403
404void ext4_itable_unused_set(struct super_block *sb,
405 struct ext4_group_desc *bg, __u32 count)
406{
407 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
408 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
409 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
410}
411
c92dc856 412static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
6a0678a7 413{
6a0678a7
AB
414 now = clamp_val(now, 0, (1ull << 40) - 1);
415
416 *lo = cpu_to_le32(lower_32_bits(now));
417 *hi = upper_32_bits(now);
418}
419
420static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
421{
422 return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
423}
424#define ext4_update_tstamp(es, tstamp) \
c92dc856
JK
425 __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
426 ktime_get_real_seconds())
6a0678a7
AB
427#define ext4_get_tstamp(es, tstamp) \
428 __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
d3d1faf6 429
bdfe0cbd
TT
430/*
431 * The del_gendisk() function uninitializes the disk-specific data
432 * structures, including the bdi structure, without telling anyone
433 * else. Once this happens, any attempt to call mark_buffer_dirty()
434 * (for example, by ext4_commit_super), will cause a kernel OOPS.
435 * This is a kludge to prevent these oops until we can put in a proper
436 * hook in del_gendisk() to inform the VFS and file system layers.
437 */
438static int block_device_ejected(struct super_block *sb)
439{
440 struct inode *bd_inode = sb->s_bdev->bd_inode;
441 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
442
443 return bdi->dev == NULL;
444}
445
18aadd47
BJ
446static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
447{
448 struct super_block *sb = journal->j_private;
449 struct ext4_sb_info *sbi = EXT4_SB(sb);
450 int error = is_journal_aborted(journal);
5d3ee208 451 struct ext4_journal_cb_entry *jce;
18aadd47 452
5d3ee208 453 BUG_ON(txn->t_state == T_FINISHED);
a0154344
DJ
454
455 ext4_process_freed_data(sb, txn->t_tid);
456
18aadd47 457 spin_lock(&sbi->s_md_lock);
5d3ee208
DM
458 while (!list_empty(&txn->t_private_list)) {
459 jce = list_entry(txn->t_private_list.next,
460 struct ext4_journal_cb_entry, jce_list);
18aadd47
BJ
461 list_del_init(&jce->jce_list);
462 spin_unlock(&sbi->s_md_lock);
463 jce->jce_func(sb, jce, error);
464 spin_lock(&sbi->s_md_lock);
465 }
466 spin_unlock(&sbi->s_md_lock);
467}
1c13d5c0 468
afb585a9
MFO
469/*
470 * This writepage callback for write_cache_pages()
471 * takes care of a few cases after page cleaning.
472 *
473 * write_cache_pages() already checks for dirty pages
474 * and calls clear_page_dirty_for_io(), which we want,
475 * to write protect the pages.
476 *
477 * However, we may have to redirty a page (see below.)
478 */
479static int ext4_journalled_writepage_callback(struct page *page,
480 struct writeback_control *wbc,
481 void *data)
482{
483 transaction_t *transaction = (transaction_t *) data;
484 struct buffer_head *bh, *head;
485 struct journal_head *jh;
486
487 bh = head = page_buffers(page);
488 do {
489 /*
490 * We have to redirty a page in these cases:
491 * 1) If buffer is dirty, it means the page was dirty because it
492 * contains a buffer that needs checkpointing. So the dirty bit
493 * needs to be preserved so that checkpointing writes the buffer
494 * properly.
495 * 2) If buffer is not part of the committing transaction
496 * (we may have just accidentally come across this buffer because
497 * inode range tracking is not exact) or if the currently running
498 * transaction already contains this buffer as well, dirty bit
499 * needs to be preserved so that the buffer gets writeprotected
500 * properly on running transaction's commit.
501 */
502 jh = bh2jh(bh);
503 if (buffer_dirty(bh) ||
504 (jh && (jh->b_transaction != transaction ||
505 jh->b_next_transaction))) {
506 redirty_page_for_writepage(wbc, page);
507 goto out;
508 }
509 } while ((bh = bh->b_this_page) != head);
510
511out:
512 return AOP_WRITEPAGE_ACTIVATE;
513}
514
515static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
516{
517 struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
518 struct writeback_control wbc = {
519 .sync_mode = WB_SYNC_ALL,
520 .nr_to_write = LONG_MAX,
521 .range_start = jinode->i_dirty_start,
522 .range_end = jinode->i_dirty_end,
523 };
524
525 return write_cache_pages(mapping, &wbc,
526 ext4_journalled_writepage_callback,
527 jinode->i_transaction);
528}
529
530static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
531{
532 int ret;
533
534 if (ext4_should_journal_data(jinode->i_vfs_inode))
535 ret = ext4_journalled_submit_inode_data_buffers(jinode);
536 else
537 ret = jbd2_journal_submit_inode_data_buffers(jinode);
538
539 return ret;
540}
541
542static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
543{
544 int ret = 0;
545
546 if (!ext4_should_journal_data(jinode->i_vfs_inode))
547 ret = jbd2_journal_finish_inode_data_buffers(jinode);
548
549 return ret;
550}
551
1dc1097f
JK
552static bool system_going_down(void)
553{
554 return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
555 || system_state == SYSTEM_RESTART;
556}
557
02a7780e
JK
558struct ext4_err_translation {
559 int code;
560 int errno;
561};
562
563#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
564
565static struct ext4_err_translation err_translation[] = {
566 EXT4_ERR_TRANSLATE(EIO),
567 EXT4_ERR_TRANSLATE(ENOMEM),
568 EXT4_ERR_TRANSLATE(EFSBADCRC),
569 EXT4_ERR_TRANSLATE(EFSCORRUPTED),
570 EXT4_ERR_TRANSLATE(ENOSPC),
571 EXT4_ERR_TRANSLATE(ENOKEY),
572 EXT4_ERR_TRANSLATE(EROFS),
573 EXT4_ERR_TRANSLATE(EFBIG),
574 EXT4_ERR_TRANSLATE(EEXIST),
575 EXT4_ERR_TRANSLATE(ERANGE),
576 EXT4_ERR_TRANSLATE(EOVERFLOW),
577 EXT4_ERR_TRANSLATE(EBUSY),
578 EXT4_ERR_TRANSLATE(ENOTDIR),
579 EXT4_ERR_TRANSLATE(ENOTEMPTY),
580 EXT4_ERR_TRANSLATE(ESHUTDOWN),
581 EXT4_ERR_TRANSLATE(EFAULT),
582};
583
584static int ext4_errno_to_code(int errno)
585{
586 int i;
587
588 for (i = 0; i < ARRAY_SIZE(err_translation); i++)
589 if (err_translation[i].errno == errno)
590 return err_translation[i].code;
591 return EXT4_ERR_UNKNOWN;
592}
593
2d01ddc8
JK
594static void save_error_info(struct super_block *sb, int error,
595 __u32 ino, __u64 block,
596 const char *func, unsigned int line)
40676623 597{
c92dc856 598 struct ext4_sb_info *sbi = EXT4_SB(sb);
40676623 599
02a7780e
JK
600 /* We default to EFSCORRUPTED error... */
601 if (error == 0)
602 error = EFSCORRUPTED;
c92dc856
JK
603
604 spin_lock(&sbi->s_error_lock);
605 sbi->s_add_error_count++;
606 sbi->s_last_error_code = error;
607 sbi->s_last_error_line = line;
608 sbi->s_last_error_ino = ino;
609 sbi->s_last_error_block = block;
610 sbi->s_last_error_func = func;
611 sbi->s_last_error_time = ktime_get_real_seconds();
612 if (!sbi->s_first_error_time) {
613 sbi->s_first_error_code = error;
614 sbi->s_first_error_line = line;
615 sbi->s_first_error_ino = ino;
616 sbi->s_first_error_block = block;
617 sbi->s_first_error_func = func;
618 sbi->s_first_error_time = sbi->s_last_error_time;
619 }
620 spin_unlock(&sbi->s_error_lock);
40676623
JK
621}
622
ac27a0ec
DK
623/* Deal with the reporting of failure conditions on a filesystem such as
624 * inconsistencies detected or read IO failures.
625 *
626 * On ext2, we can store the error state of the filesystem in the
617ba13b 627 * superblock. That is not possible on ext4, because we may have other
ac27a0ec
DK
628 * write ordering constraints on the superblock which prevent us from
629 * writing it out straight away; and given that the journal is about to
630 * be aborted, we can't rely on the current, or future, transactions to
631 * write out the superblock safely.
632 *
dab291af 633 * We'll just use the jbd2_journal_abort() error code to record an error in
d6b198bc 634 * the journal instead. On recovery, the journal will complain about
ac27a0ec 635 * that error until we've noted it down and cleared it.
014c9caa
JK
636 *
637 * If force_ro is set, we unconditionally force the filesystem into an
638 * ABORT|READONLY state, unless the error response on the fs has been set to
639 * panic in which case we take the easy way out and panic immediately. This is
640 * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
641 * at a critical moment in log management.
ac27a0ec 642 */
e789ca0c
JK
643static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
644 __u32 ino, __u64 block,
645 const char *func, unsigned int line)
ac27a0ec 646{
b08070ec 647 journal_t *journal = EXT4_SB(sb)->s_journal;
2d01ddc8 648 bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
b08070ec 649
e789ca0c 650 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
327eaf73
TT
651 if (test_opt(sb, WARN_ON_ERROR))
652 WARN_ON_ONCE(1);
653
2d01ddc8
JK
654 if (!continue_fs && !sb_rdonly(sb)) {
655 ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
656 if (journal)
657 jbd2_journal_abort(journal, -EIO);
658 }
659
660 if (!bdev_read_only(sb->s_bdev)) {
e789ca0c 661 save_error_info(sb, error, ino, block, func, line);
2d01ddc8
JK
662 /*
663 * In case the fs should keep running, we need to writeout
664 * superblock through the journal. Due to lock ordering
665 * constraints, it may not be safe to do it right here so we
666 * defer superblock flushing to a workqueue.
667 */
bb9464e0 668 if (continue_fs && journal)
2d01ddc8
JK
669 schedule_work(&EXT4_SB(sb)->s_error_work);
670 else
671 ext4_commit_super(sb);
672 }
e789ca0c 673
1dc1097f
JK
674 /*
675 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
676 * could panic during 'reboot -f' as the underlying device got already
677 * disabled.
678 */
014c9caa 679 if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
617ba13b 680 panic("EXT4-fs (device %s): panic forced after error\n",
ac27a0ec 681 sb->s_id);
4327ba52 682 }
ac2f7ca5
YB
683
684 if (sb_rdonly(sb) || continue_fs)
685 return;
686
014c9caa
JK
687 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
688 /*
689 * Make sure updated value of ->s_mount_flags will be visible before
690 * ->s_flags update
691 */
692 smp_wmb();
693 sb->s_flags |= SB_RDONLY;
ac27a0ec
DK
694}
695
c92dc856
JK
696static void flush_stashed_error_work(struct work_struct *work)
697{
698 struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
699 s_error_work);
2d01ddc8
JK
700 journal_t *journal = sbi->s_journal;
701 handle_t *handle;
c92dc856 702
2d01ddc8
JK
703 /*
704 * If the journal is still running, we have to write out superblock
705 * through the journal to avoid collisions of other journalled sb
706 * updates.
707 *
708 * We use directly jbd2 functions here to avoid recursing back into
709 * ext4 error handling code during handling of previous errors.
710 */
711 if (!sb_rdonly(sbi->s_sb) && journal) {
558d6450 712 struct buffer_head *sbh = sbi->s_sbh;
2d01ddc8
JK
713 handle = jbd2_journal_start(journal, 1);
714 if (IS_ERR(handle))
715 goto write_directly;
558d6450 716 if (jbd2_journal_get_write_access(handle, sbh)) {
2d01ddc8
JK
717 jbd2_journal_stop(handle);
718 goto write_directly;
719 }
720 ext4_update_super(sbi->s_sb);
558d6450
YB
721 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
722 ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
723 "superblock detected");
724 clear_buffer_write_io_error(sbh);
725 set_buffer_uptodate(sbh);
726 }
727
728 if (jbd2_journal_dirty_metadata(handle, sbh)) {
2d01ddc8
JK
729 jbd2_journal_stop(handle);
730 goto write_directly;
731 }
732 jbd2_journal_stop(handle);
d578b994 733 ext4_notify_error_sysfs(sbi);
2d01ddc8
JK
734 return;
735 }
736write_directly:
737 /*
738 * Write through journal failed. Write sb directly to get error info
739 * out and hope for the best.
740 */
4392fbc4 741 ext4_commit_super(sbi->s_sb);
d578b994 742 ext4_notify_error_sysfs(sbi);
ac27a0ec
DK
743}
744
efbed4dc
TT
745#define ext4_error_ratelimit(sb) \
746 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
747 "EXT4-fs error")
748
12062ddd 749void __ext4_error(struct super_block *sb, const char *function,
014c9caa 750 unsigned int line, bool force_ro, int error, __u64 block,
54d3adbc 751 const char *fmt, ...)
ac27a0ec 752{
0ff2ea7d 753 struct va_format vaf;
ac27a0ec
DK
754 va_list args;
755
0db1ff22
TT
756 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
757 return;
758
ccf0f32a 759 trace_ext4_error(sb, function, line);
efbed4dc
TT
760 if (ext4_error_ratelimit(sb)) {
761 va_start(args, fmt);
762 vaf.fmt = fmt;
763 vaf.va = &args;
764 printk(KERN_CRIT
765 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
766 sb->s_id, function, line, current->comm, &vaf);
767 va_end(args);
768 }
9a089b21
GKB
769 fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
770
e789ca0c 771 ext4_handle_error(sb, force_ro, error, 0, block, function, line);
ac27a0ec
DK
772}
773
e7c96e8e 774void __ext4_error_inode(struct inode *inode, const char *function,
54d3adbc 775 unsigned int line, ext4_fsblk_t block, int error,
e7c96e8e 776 const char *fmt, ...)
273df556
FM
777{
778 va_list args;
f7c21177 779 struct va_format vaf;
273df556 780
0db1ff22
TT
781 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
782 return;
783
ccf0f32a 784 trace_ext4_error(inode->i_sb, function, line);
efbed4dc
TT
785 if (ext4_error_ratelimit(inode->i_sb)) {
786 va_start(args, fmt);
787 vaf.fmt = fmt;
788 vaf.va = &args;
789 if (block)
790 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
791 "inode #%lu: block %llu: comm %s: %pV\n",
792 inode->i_sb->s_id, function, line, inode->i_ino,
793 block, current->comm, &vaf);
794 else
795 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
796 "inode #%lu: comm %s: %pV\n",
797 inode->i_sb->s_id, function, line, inode->i_ino,
798 current->comm, &vaf);
799 va_end(args);
800 }
9a089b21
GKB
801 fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
802
e789ca0c
JK
803 ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
804 function, line);
273df556
FM
805}
806
e7c96e8e
JP
807void __ext4_error_file(struct file *file, const char *function,
808 unsigned int line, ext4_fsblk_t block,
809 const char *fmt, ...)
273df556
FM
810{
811 va_list args;
f7c21177 812 struct va_format vaf;
496ad9aa 813 struct inode *inode = file_inode(file);
273df556
FM
814 char pathname[80], *path;
815
0db1ff22
TT
816 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
817 return;
818
ccf0f32a 819 trace_ext4_error(inode->i_sb, function, line);
efbed4dc 820 if (ext4_error_ratelimit(inode->i_sb)) {
9bf39ab2 821 path = file_path(file, pathname, sizeof(pathname));
efbed4dc
TT
822 if (IS_ERR(path))
823 path = "(unknown)";
824 va_start(args, fmt);
825 vaf.fmt = fmt;
826 vaf.va = &args;
827 if (block)
828 printk(KERN_CRIT
829 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
830 "block %llu: comm %s: path %s: %pV\n",
831 inode->i_sb->s_id, function, line, inode->i_ino,
832 block, current->comm, path, &vaf);
833 else
834 printk(KERN_CRIT
835 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
836 "comm %s: path %s: %pV\n",
837 inode->i_sb->s_id, function, line, inode->i_ino,
838 current->comm, path, &vaf);
839 va_end(args);
840 }
9a089b21
GKB
841 fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
842
e789ca0c
JK
843 ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
844 function, line);
273df556
FM
845}
846
722887dd
TT
847const char *ext4_decode_error(struct super_block *sb, int errno,
848 char nbuf[16])
ac27a0ec
DK
849{
850 char *errstr = NULL;
851
852 switch (errno) {
6a797d27
DW
853 case -EFSCORRUPTED:
854 errstr = "Corrupt filesystem";
855 break;
856 case -EFSBADCRC:
857 errstr = "Filesystem failed CRC";
858 break;
ac27a0ec
DK
859 case -EIO:
860 errstr = "IO failure";
861 break;
862 case -ENOMEM:
863 errstr = "Out of memory";
864 break;
865 case -EROFS:
78f1ddbb
TT
866 if (!sb || (EXT4_SB(sb)->s_journal &&
867 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
ac27a0ec
DK
868 errstr = "Journal has aborted";
869 else
870 errstr = "Readonly filesystem";
871 break;
872 default:
873 /* If the caller passed in an extra buffer for unknown
874 * errors, textualise them now. Else we just return
875 * NULL. */
876 if (nbuf) {
877 /* Check for truncated error codes... */
878 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
879 errstr = nbuf;
880 }
881 break;
882 }
883
884 return errstr;
885}
886
617ba13b 887/* __ext4_std_error decodes expected errors from journaling functions
ac27a0ec
DK
888 * automatically and invokes the appropriate error response. */
889
c398eda0
TT
890void __ext4_std_error(struct super_block *sb, const char *function,
891 unsigned int line, int errno)
ac27a0ec
DK
892{
893 char nbuf[16];
894 const char *errstr;
895
0db1ff22
TT
896 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
897 return;
898
ac27a0ec
DK
899 /* Special case: if the error is EROFS, and we're not already
900 * inside a transaction, then there's really no point in logging
901 * an error. */
bc98a42c 902 if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
ac27a0ec
DK
903 return;
904
efbed4dc
TT
905 if (ext4_error_ratelimit(sb)) {
906 errstr = ext4_decode_error(sb, errno, nbuf);
907 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
908 sb->s_id, function, line, errstr);
909 }
9a089b21 910 fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
ac27a0ec 911
e789ca0c 912 ext4_handle_error(sb, false, -errno, 0, 0, function, line);
ac27a0ec
DK
913}
914
e7c96e8e
JP
915void __ext4_msg(struct super_block *sb,
916 const char *prefix, const char *fmt, ...)
b31e1552 917{
0ff2ea7d 918 struct va_format vaf;
b31e1552
ES
919 va_list args;
920
da812f61
LC
921 if (sb) {
922 atomic_inc(&EXT4_SB(sb)->s_msg_count);
923 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
924 "EXT4-fs"))
925 return;
926 }
efbed4dc 927
b31e1552 928 va_start(args, fmt);
0ff2ea7d
JP
929 vaf.fmt = fmt;
930 vaf.va = &args;
da812f61
LC
931 if (sb)
932 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
933 else
934 printk("%sEXT4-fs: %pV\n", prefix, &vaf);
b31e1552
ES
935 va_end(args);
936}
937
1cf006ed
DM
938static int ext4_warning_ratelimit(struct super_block *sb)
939{
940 atomic_inc(&EXT4_SB(sb)->s_warning_count);
941 return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
942 "EXT4-fs warning");
943}
b03a2f7e 944
12062ddd 945void __ext4_warning(struct super_block *sb, const char *function,
c398eda0 946 unsigned int line, const char *fmt, ...)
ac27a0ec 947{
0ff2ea7d 948 struct va_format vaf;
ac27a0ec
DK
949 va_list args;
950
b03a2f7e 951 if (!ext4_warning_ratelimit(sb))
efbed4dc
TT
952 return;
953
ac27a0ec 954 va_start(args, fmt);
0ff2ea7d
JP
955 vaf.fmt = fmt;
956 vaf.va = &args;
957 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
958 sb->s_id, function, line, &vaf);
ac27a0ec
DK
959 va_end(args);
960}
961
b03a2f7e
AD
962void __ext4_warning_inode(const struct inode *inode, const char *function,
963 unsigned int line, const char *fmt, ...)
964{
965 struct va_format vaf;
966 va_list args;
967
968 if (!ext4_warning_ratelimit(inode->i_sb))
969 return;
970
971 va_start(args, fmt);
972 vaf.fmt = fmt;
973 vaf.va = &args;
974 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
975 "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
976 function, line, inode->i_ino, current->comm, &vaf);
977 va_end(args);
978}
979
e29136f8
TT
980void __ext4_grp_locked_error(const char *function, unsigned int line,
981 struct super_block *sb, ext4_group_t grp,
982 unsigned long ino, ext4_fsblk_t block,
983 const char *fmt, ...)
5d1b1b3f
AK
984__releases(bitlock)
985__acquires(bitlock)
986{
0ff2ea7d 987 struct va_format vaf;
5d1b1b3f 988 va_list args;
5d1b1b3f 989
0db1ff22
TT
990 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
991 return;
992
ccf0f32a 993 trace_ext4_error(sb, function, line);
efbed4dc
TT
994 if (ext4_error_ratelimit(sb)) {
995 va_start(args, fmt);
996 vaf.fmt = fmt;
997 vaf.va = &args;
998 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
999 sb->s_id, function, line, grp);
1000 if (ino)
1001 printk(KERN_CONT "inode %lu: ", ino);
1002 if (block)
1003 printk(KERN_CONT "block %llu:",
1004 (unsigned long long) block);
1005 printk(KERN_CONT "%pV\n", &vaf);
1006 va_end(args);
1007 }
5d1b1b3f
AK
1008
1009 if (test_opt(sb, ERRORS_CONT)) {
c92dc856
JK
1010 if (test_opt(sb, WARN_ON_ERROR))
1011 WARN_ON_ONCE(1);
e789ca0c 1012 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2d01ddc8
JK
1013 if (!bdev_read_only(sb->s_bdev)) {
1014 save_error_info(sb, EFSCORRUPTED, ino, block, function,
1015 line);
e789ca0c 1016 schedule_work(&EXT4_SB(sb)->s_error_work);
2d01ddc8 1017 }
5d1b1b3f
AK
1018 return;
1019 }
1020 ext4_unlock_group(sb, grp);
e789ca0c 1021 ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
5d1b1b3f
AK
1022 /*
1023 * We only get here in the ERRORS_RO case; relocking the group
1024 * may be dangerous, but nothing bad will happen since the
1025 * filesystem will have already been marked read/only and the
1026 * journal has been aborted. We return 1 as a hint to callers
1027 * who might what to use the return value from
25985edc 1028 * ext4_grp_locked_error() to distinguish between the
5d1b1b3f
AK
1029 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1030 * aggressively from the ext4 function in question, with a
1031 * more appropriate error code.
1032 */
1033 ext4_lock_group(sb, grp);
1034 return;
1035}
1036
db79e6d1
WS
1037void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1038 ext4_group_t group,
1039 unsigned int flags)
1040{
1041 struct ext4_sb_info *sbi = EXT4_SB(sb);
1042 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1043 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
9af0b3d1
WS
1044 int ret;
1045
1046 if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1047 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1048 &grp->bb_state);
1049 if (!ret)
1050 percpu_counter_sub(&sbi->s_freeclusters_counter,
1051 grp->bb_free);
db79e6d1
WS
1052 }
1053
9af0b3d1
WS
1054 if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1055 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1056 &grp->bb_state);
1057 if (!ret && gdp) {
db79e6d1
WS
1058 int count;
1059
1060 count = ext4_free_inodes_count(sb, gdp);
1061 percpu_counter_sub(&sbi->s_freeinodes_counter,
1062 count);
1063 }
db79e6d1
WS
1064 }
1065}
1066
617ba13b 1067void ext4_update_dynamic_rev(struct super_block *sb)
ac27a0ec 1068{
617ba13b 1069 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ac27a0ec 1070
617ba13b 1071 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
ac27a0ec
DK
1072 return;
1073
12062ddd 1074 ext4_warning(sb,
ac27a0ec
DK
1075 "updating to rev %d because of new feature flag, "
1076 "running e2fsck is recommended",
617ba13b 1077 EXT4_DYNAMIC_REV);
ac27a0ec 1078
617ba13b
MC
1079 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1080 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1081 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
ac27a0ec
DK
1082 /* leave es->s_feature_*compat flags alone */
1083 /* es->s_uuid will be set by e2fsck if empty */
1084
1085 /*
1086 * The rest of the superblock fields should be zero, and if not it
1087 * means they are likely already in use, so leave them alone. We
1088 * can leave it up to e2fsck to clean up any inconsistencies there.
1089 */
1090}
1091
1092/*
1093 * Open the external journal device
1094 */
b31e1552 1095static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
ac27a0ec
DK
1096{
1097 struct block_device *bdev;
ac27a0ec 1098
d4d77629 1099 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
ac27a0ec
DK
1100 if (IS_ERR(bdev))
1101 goto fail;
1102 return bdev;
1103
1104fail:
ea3edd4d
CH
1105 ext4_msg(sb, KERN_ERR,
1106 "failed to open journal device unknown-block(%u,%u) %ld",
1107 MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
ac27a0ec
DK
1108 return NULL;
1109}
1110
1111/*
1112 * Release the journal device
1113 */
4385bab1 1114static void ext4_blkdev_put(struct block_device *bdev)
ac27a0ec 1115{
4385bab1 1116 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
ac27a0ec
DK
1117}
1118
4385bab1 1119static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
ac27a0ec
DK
1120{
1121 struct block_device *bdev;
ee7ed3aa 1122 bdev = sbi->s_journal_bdev;
ac27a0ec 1123 if (bdev) {
4385bab1 1124 ext4_blkdev_put(bdev);
ee7ed3aa 1125 sbi->s_journal_bdev = NULL;
ac27a0ec 1126 }
ac27a0ec
DK
1127}
1128
1129static inline struct inode *orphan_list_entry(struct list_head *l)
1130{
617ba13b 1131 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
ac27a0ec
DK
1132}
1133
617ba13b 1134static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
ac27a0ec
DK
1135{
1136 struct list_head *l;
1137
b31e1552
ES
1138 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1139 le32_to_cpu(sbi->s_es->s_last_orphan));
ac27a0ec
DK
1140
1141 printk(KERN_ERR "sb_info orphan list:\n");
1142 list_for_each(l, &sbi->s_orphan) {
1143 struct inode *inode = orphan_list_entry(l);
1144 printk(KERN_ERR " "
1145 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1146 inode->i_sb->s_id, inode->i_ino, inode,
1147 inode->i_mode, inode->i_nlink,
1148 NEXT_ORPHAN(inode));
1149 }
1150}
1151
957153fc
JK
1152#ifdef CONFIG_QUOTA
1153static int ext4_quota_off(struct super_block *sb, int type);
1154
1155static inline void ext4_quota_off_umount(struct super_block *sb)
1156{
1157 int type;
1158
964edf66
JK
1159 /* Use our quota_off function to clear inode flags etc. */
1160 for (type = 0; type < EXT4_MAXQUOTAS; type++)
1161 ext4_quota_off(sb, type);
957153fc 1162}
33458eab
TT
1163
1164/*
1165 * This is a helper function which is used in the mount/remount
1166 * codepaths (which holds s_umount) to fetch the quota file name.
1167 */
1168static inline char *get_qf_name(struct super_block *sb,
1169 struct ext4_sb_info *sbi,
1170 int type)
1171{
1172 return rcu_dereference_protected(sbi->s_qf_names[type],
1173 lockdep_is_held(&sb->s_umount));
1174}
957153fc
JK
1175#else
1176static inline void ext4_quota_off_umount(struct super_block *sb)
1177{
1178}
1179#endif
1180
2b2d6d01 1181static void ext4_put_super(struct super_block *sb)
ac27a0ec 1182{
617ba13b
MC
1183 struct ext4_sb_info *sbi = EXT4_SB(sb);
1184 struct ext4_super_block *es = sbi->s_es;
1d0c3924 1185 struct buffer_head **group_desc;
7c990728 1186 struct flex_groups **flex_groups;
97abd7d4 1187 int aborted = 0;
ef2cabf7 1188 int i, err;
ac27a0ec 1189
857ac889 1190 ext4_unregister_li_request(sb);
957153fc 1191 ext4_quota_off_umount(sb);
e0ccfd95 1192
c92dc856 1193 flush_work(&sbi->s_error_work);
2e8fa54e 1194 destroy_workqueue(sbi->rsv_conversion_wq);
02f310fc 1195 ext4_release_orphan_info(sb);
4c0425ff 1196
5e47868f
RH
1197 /*
1198 * Unregister sysfs before destroying jbd2 journal.
1199 * Since we could still access attr_journal_task attribute via sysfs
1200 * path which could have sbi->s_journal->j_task as NULL
1201 */
1202 ext4_unregister_sysfs(sb);
1203
0390131b 1204 if (sbi->s_journal) {
97abd7d4 1205 aborted = is_journal_aborted(sbi->s_journal);
0390131b
FM
1206 err = jbd2_journal_destroy(sbi->s_journal);
1207 sbi->s_journal = NULL;
878520ac 1208 if ((err < 0) && !aborted) {
54d3adbc 1209 ext4_abort(sb, -err, "Couldn't clean up the journal");
878520ac 1210 }
0390131b 1211 }
d4edac31 1212
d3922a77 1213 ext4_es_unregister_shrinker(sbi);
9105bb14 1214 del_timer_sync(&sbi->s_err_report);
d4edac31
JB
1215 ext4_release_system_zone(sb);
1216 ext4_mb_release(sb);
1217 ext4_ext_release(sb);
d4edac31 1218
bc98a42c 1219 if (!sb_rdonly(sb) && !aborted) {
e2b911c5 1220 ext4_clear_feature_journal_needs_recovery(sb);
02f310fc 1221 ext4_clear_feature_orphan_present(sb);
ac27a0ec 1222 es->s_state = cpu_to_le16(sbi->s_mount_state);
ac27a0ec 1223 }
bc98a42c 1224 if (!sb_rdonly(sb))
4392fbc4 1225 ext4_commit_super(sb);
a8e25a83 1226
1d0c3924
TT
1227 rcu_read_lock();
1228 group_desc = rcu_dereference(sbi->s_group_desc);
ac27a0ec 1229 for (i = 0; i < sbi->s_gdb_count; i++)
1d0c3924
TT
1230 brelse(group_desc[i]);
1231 kvfree(group_desc);
7c990728
SJS
1232 flex_groups = rcu_dereference(sbi->s_flex_groups);
1233 if (flex_groups) {
1234 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1235 kvfree(flex_groups[i]);
1236 kvfree(flex_groups);
1237 }
1d0c3924 1238 rcu_read_unlock();
57042651 1239 percpu_counter_destroy(&sbi->s_freeclusters_counter);
ac27a0ec
DK
1240 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1241 percpu_counter_destroy(&sbi->s_dirs_counter);
57042651 1242 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
efc61345 1243 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
bbd55937 1244 percpu_free_rwsem(&sbi->s_writepages_rwsem);
ac27a0ec 1245#ifdef CONFIG_QUOTA
a2d4a646 1246 for (i = 0; i < EXT4_MAXQUOTAS; i++)
33458eab 1247 kfree(get_qf_name(sb, sbi, i));
ac27a0ec
DK
1248#endif
1249
1250 /* Debugging code just in case the in-memory inode orphan list
1251 * isn't empty. The on-disk one can be non-empty if we've
1252 * detected an error and taken the fs readonly, but the
1253 * in-memory list had better be clean by this point. */
1254 if (!list_empty(&sbi->s_orphan))
1255 dump_orphan_list(sb, sbi);
837c23fb 1256 ASSERT(list_empty(&sbi->s_orphan));
ac27a0ec 1257
89d96a6f 1258 sync_blockdev(sb->s_bdev);
f98393a6 1259 invalidate_bdev(sb->s_bdev);
ee7ed3aa 1260 if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
ac27a0ec
DK
1261 /*
1262 * Invalidate the journal device's buffers. We don't want them
1263 * floating about in memory - the physical journal device may
1264 * hotswapped, and it breaks the `ro-after' testing code.
1265 */
ee7ed3aa
CX
1266 sync_blockdev(sbi->s_journal_bdev);
1267 invalidate_bdev(sbi->s_journal_bdev);
617ba13b 1268 ext4_blkdev_remove(sbi);
ac27a0ec 1269 }
50c15df6
CX
1270
1271 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1272 sbi->s_ea_inode_cache = NULL;
1273
1274 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1275 sbi->s_ea_block_cache = NULL;
1276
618f0031
PS
1277 ext4_stop_mmpd(sbi);
1278
9060dd2c 1279 brelse(sbi->s_sbh);
ac27a0ec 1280 sb->s_fs_info = NULL;
3197ebdb
TT
1281 /*
1282 * Now that we are completely done shutting down the
1283 * superblock, we need to actually destroy the kobject.
1284 */
3197ebdb
TT
1285 kobject_put(&sbi->s_kobj);
1286 wait_for_completion(&sbi->s_kobj_unregister);
0441984a
DW
1287 if (sbi->s_chksum_driver)
1288 crypto_free_shash(sbi->s_chksum_driver);
705895b6 1289 kfree(sbi->s_blockgroup_lock);
5e405595 1290 fs_put_dax(sbi->s_daxdev);
ac4acb1f 1291 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
c83ad55e 1292#ifdef CONFIG_UNICODE
f8f4acb6 1293 utf8_unload(sb->s_encoding);
c83ad55e 1294#endif
ac27a0ec 1295 kfree(sbi);
ac27a0ec
DK
1296}
1297
e18b890b 1298static struct kmem_cache *ext4_inode_cachep;
ac27a0ec
DK
1299
1300/*
1301 * Called inside transaction, so use GFP_NOFS
1302 */
617ba13b 1303static struct inode *ext4_alloc_inode(struct super_block *sb)
ac27a0ec 1304{
617ba13b 1305 struct ext4_inode_info *ei;
ac27a0ec 1306
e6b4f8da 1307 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
ac27a0ec
DK
1308 if (!ei)
1309 return NULL;
0b8e58a1 1310
ee73f9a5 1311 inode_set_iversion(&ei->vfs_inode, 1);
202ee5df 1312 spin_lock_init(&ei->i_raw_lock);
c9de560d 1313 INIT_LIST_HEAD(&ei->i_prealloc_list);
27bc446e 1314 atomic_set(&ei->i_prealloc_active, 0);
c9de560d 1315 spin_lock_init(&ei->i_prealloc_lock);
9a26b661
ZL
1316 ext4_es_init_tree(&ei->i_es_tree);
1317 rwlock_init(&ei->i_es_lock);
edaa53ca 1318 INIT_LIST_HEAD(&ei->i_es_list);
eb68d0e2 1319 ei->i_es_all_nr = 0;
edaa53ca 1320 ei->i_es_shk_nr = 0;
dd475925 1321 ei->i_es_shrink_lblk = 0;
d2a17637 1322 ei->i_reserved_data_blocks = 0;
d2a17637 1323 spin_lock_init(&(ei->i_block_reservation_lock));
1dc0aa46 1324 ext4_init_pending_tree(&ei->i_pending_tree);
a9e7f447
DM
1325#ifdef CONFIG_QUOTA
1326 ei->i_reserved_quota = 0;
96c7e0d9 1327 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
a9e7f447 1328#endif
8aefcd55 1329 ei->jinode = NULL;
2e8fa54e 1330 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
744692dc 1331 spin_lock_init(&ei->i_completed_io_lock);
b436b9be
JK
1332 ei->i_sync_tid = 0;
1333 ei->i_datasync_tid = 0;
e27f41e1 1334 atomic_set(&ei->i_unwritten, 0);
2e8fa54e 1335 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
aa75f4d3
HS
1336 ext4_fc_init_inode(&ei->vfs_inode);
1337 mutex_init(&ei->i_fc_lock);
ac27a0ec
DK
1338 return &ei->vfs_inode;
1339}
1340
7ff9c073
TT
1341static int ext4_drop_inode(struct inode *inode)
1342{
1343 int drop = generic_drop_inode(inode);
1344
29b3692e
EB
1345 if (!drop)
1346 drop = fscrypt_drop_inode(inode);
1347
7ff9c073
TT
1348 trace_ext4_drop_inode(inode, drop);
1349 return drop;
1350}
1351
94053139 1352static void ext4_free_in_core_inode(struct inode *inode)
fa0d7e3d 1353{
2c58d548 1354 fscrypt_free_inode(inode);
aa75f4d3
HS
1355 if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1356 pr_warn("%s: inode %ld still in fc list",
1357 __func__, inode->i_ino);
1358 }
fa0d7e3d
NP
1359 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1360}
1361
617ba13b 1362static void ext4_destroy_inode(struct inode *inode)
ac27a0ec 1363{
9f7dd93d 1364 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
b31e1552
ES
1365 ext4_msg(inode->i_sb, KERN_ERR,
1366 "Inode %lu (%p): orphan list check failed!",
1367 inode->i_ino, EXT4_I(inode));
9f7dd93d
VA
1368 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1369 EXT4_I(inode), sizeof(struct ext4_inode_info),
1370 true);
1371 dump_stack();
1372 }
6fed8395
JX
1373
1374 if (EXT4_I(inode)->i_reserved_data_blocks)
1375 ext4_msg(inode->i_sb, KERN_ERR,
1376 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1377 inode->i_ino, EXT4_I(inode),
1378 EXT4_I(inode)->i_reserved_data_blocks);
ac27a0ec
DK
1379}
1380
51cc5068 1381static void init_once(void *foo)
ac27a0ec 1382{
617ba13b 1383 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
ac27a0ec 1384
a35afb83 1385 INIT_LIST_HEAD(&ei->i_orphan);
a35afb83 1386 init_rwsem(&ei->xattr_sem);
0e855ac8 1387 init_rwsem(&ei->i_data_sem);
a35afb83 1388 inode_init_once(&ei->vfs_inode);
aa75f4d3 1389 ext4_fc_init_inode(&ei->vfs_inode);
ac27a0ec
DK
1390}
1391
e67bc2b3 1392static int __init init_inodecache(void)
ac27a0ec 1393{
f8dd7c70
DW
1394 ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1395 sizeof(struct ext4_inode_info), 0,
1396 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1397 SLAB_ACCOUNT),
1398 offsetof(struct ext4_inode_info, i_data),
1399 sizeof_field(struct ext4_inode_info, i_data),
1400 init_once);
617ba13b 1401 if (ext4_inode_cachep == NULL)
ac27a0ec
DK
1402 return -ENOMEM;
1403 return 0;
1404}
1405
1406static void destroy_inodecache(void)
1407{
8c0a8537
KS
1408 /*
1409 * Make sure all delayed rcu free inodes are flushed before we
1410 * destroy cache.
1411 */
1412 rcu_barrier();
617ba13b 1413 kmem_cache_destroy(ext4_inode_cachep);
ac27a0ec
DK
1414}
1415
0930fcc1 1416void ext4_clear_inode(struct inode *inode)
ac27a0ec 1417{
aa75f4d3 1418 ext4_fc_del(inode);
0930fcc1 1419 invalidate_inode_buffers(inode);
dbd5768f 1420 clear_inode(inode);
27bc446e 1421 ext4_discard_preallocations(inode, 0);
51865fda 1422 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
f4c2d372 1423 dquot_drop(inode);
8aefcd55
TT
1424 if (EXT4_I(inode)->jinode) {
1425 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1426 EXT4_I(inode)->jinode);
1427 jbd2_free_inode(EXT4_I(inode)->jinode);
1428 EXT4_I(inode)->jinode = NULL;
1429 }
3d204e24 1430 fscrypt_put_encryption_info(inode);
c93d8f88 1431 fsverity_cleanup_inode(inode);
ac27a0ec
DK
1432}
1433
1b961ac0 1434static struct inode *ext4_nfs_get_inode(struct super_block *sb,
0b8e58a1 1435 u64 ino, u32 generation)
ac27a0ec 1436{
ac27a0ec 1437 struct inode *inode;
ac27a0ec 1438
8a363970 1439 /*
ac27a0ec
DK
1440 * Currently we don't know the generation for parent directory, so
1441 * a generation of 0 means "accept any"
1442 */
8a363970 1443 inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1d1fe1ee
DH
1444 if (IS_ERR(inode))
1445 return ERR_CAST(inode);
1446 if (generation && inode->i_generation != generation) {
ac27a0ec
DK
1447 iput(inode);
1448 return ERR_PTR(-ESTALE);
1449 }
1b961ac0
CH
1450
1451 return inode;
1452}
1453
1454static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
0b8e58a1 1455 int fh_len, int fh_type)
1b961ac0
CH
1456{
1457 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1458 ext4_nfs_get_inode);
1459}
1460
1461static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
0b8e58a1 1462 int fh_len, int fh_type)
1b961ac0
CH
1463{
1464 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1465 ext4_nfs_get_inode);
ac27a0ec
DK
1466}
1467
fde87268
TT
1468static int ext4_nfs_commit_metadata(struct inode *inode)
1469{
1470 struct writeback_control wbc = {
1471 .sync_mode = WB_SYNC_ALL
1472 };
1473
1474 trace_ext4_nfs_commit_metadata(inode);
1475 return ext4_write_inode(inode, &wbc);
1476}
1477
643fa961 1478#ifdef CONFIG_FS_ENCRYPTION
a7550b30
JK
1479static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
1480{
1481 return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
1482 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
1483}
1484
a7550b30
JK
1485static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
1486 void *fs_data)
1487{
2f8f5e76 1488 handle_t *handle = fs_data;
c1a5d5f6 1489 int res, res2, credits, retries = 0;
2f8f5e76 1490
9ce0151a
EB
1491 /*
1492 * Encrypting the root directory is not allowed because e2fsck expects
1493 * lost+found to exist and be unencrypted, and encrypting the root
1494 * directory would imply encrypting the lost+found directory as well as
1495 * the filename "lost+found" itself.
1496 */
1497 if (inode->i_ino == EXT4_ROOT_INO)
1498 return -EPERM;
2f8f5e76 1499
7d3e06a8
RZ
1500 if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
1501 return -EINVAL;
1502
b383a73f
IW
1503 if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
1504 return -EOPNOTSUPP;
1505
94840e3c
EB
1506 res = ext4_convert_inline_data(inode);
1507 if (res)
1508 return res;
1509
2f8f5e76
EB
1510 /*
1511 * If a journal handle was specified, then the encryption context is
1512 * being set on a new inode via inheritance and is part of a larger
1513 * transaction to create the inode. Otherwise the encryption context is
1514 * being set on an existing inode in its own transaction. Only in the
1515 * latter case should the "retry on ENOSPC" logic be used.
1516 */
a7550b30 1517
2f8f5e76
EB
1518 if (handle) {
1519 res = ext4_xattr_set_handle(handle, inode,
1520 EXT4_XATTR_INDEX_ENCRYPTION,
1521 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1522 ctx, len, 0);
a7550b30
JK
1523 if (!res) {
1524 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1525 ext4_clear_inode_state(inode,
1526 EXT4_STATE_MAY_INLINE_DATA);
a3caa24b 1527 /*
2ee6a576
EB
1528 * Update inode->i_flags - S_ENCRYPTED will be enabled,
1529 * S_DAX may be disabled
a3caa24b 1530 */
043546e4 1531 ext4_set_inode_flags(inode, false);
a7550b30
JK
1532 }
1533 return res;
1534 }
1535
b8cb5a54
TE
1536 res = dquot_initialize(inode);
1537 if (res)
1538 return res;
2f8f5e76 1539retry:
af65207c
TE
1540 res = ext4_xattr_set_credits(inode, len, false /* is_create */,
1541 &credits);
dec214d0
TE
1542 if (res)
1543 return res;
1544
c1a5d5f6 1545 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
a7550b30
JK
1546 if (IS_ERR(handle))
1547 return PTR_ERR(handle);
1548
2f8f5e76
EB
1549 res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
1550 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
1551 ctx, len, 0);
a7550b30
JK
1552 if (!res) {
1553 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
2ee6a576
EB
1554 /*
1555 * Update inode->i_flags - S_ENCRYPTED will be enabled,
1556 * S_DAX may be disabled
1557 */
043546e4 1558 ext4_set_inode_flags(inode, false);
a7550b30
JK
1559 res = ext4_mark_inode_dirty(handle, inode);
1560 if (res)
1561 EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
1562 }
1563 res2 = ext4_journal_stop(handle);
2f8f5e76
EB
1564
1565 if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
1566 goto retry;
a7550b30
JK
1567 if (!res)
1568 res = res2;
1569 return res;
1570}
1571
ac4acb1f 1572static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb)
a7550b30 1573{
ac4acb1f 1574 return EXT4_SB(sb)->s_dummy_enc_policy.policy;
a7550b30
JK
1575}
1576
b925acb8
EB
1577static bool ext4_has_stable_inodes(struct super_block *sb)
1578{
1579 return ext4_has_feature_stable_inodes(sb);
1580}
1581
1582static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
1583 int *ino_bits_ret, int *lblk_bits_ret)
1584{
1585 *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
1586 *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
1587}
1588
6f69f0ed 1589static const struct fscrypt_operations ext4_cryptops = {
a5d431ef 1590 .key_prefix = "ext4:",
a7550b30 1591 .get_context = ext4_get_context,
a7550b30 1592 .set_context = ext4_set_context,
ac4acb1f 1593 .get_dummy_policy = ext4_get_dummy_policy,
a7550b30 1594 .empty_dir = ext4_empty_dir,
b925acb8
EB
1595 .has_stable_inodes = ext4_has_stable_inodes,
1596 .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
a7550b30 1597};
a7550b30
JK
1598#endif
1599
ac27a0ec 1600#ifdef CONFIG_QUOTA
d6006186 1601static const char * const quotatypes[] = INITQFNAMES;
689c958c 1602#define QTYPE2NAME(t) (quotatypes[t])
ac27a0ec 1603
617ba13b
MC
1604static int ext4_write_dquot(struct dquot *dquot);
1605static int ext4_acquire_dquot(struct dquot *dquot);
1606static int ext4_release_dquot(struct dquot *dquot);
1607static int ext4_mark_dquot_dirty(struct dquot *dquot);
1608static int ext4_write_info(struct super_block *sb, int type);
6f28e087 1609static int ext4_quota_on(struct super_block *sb, int type, int format_id,
8c54ca9c 1610 const struct path *path);
617ba13b 1611static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
ac27a0ec 1612 size_t len, loff_t off);
617ba13b 1613static ssize_t ext4_quota_write(struct super_block *sb, int type,
ac27a0ec 1614 const char *data, size_t len, loff_t off);
7c319d32
AK
1615static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1616 unsigned int flags);
ac27a0ec 1617
96c7e0d9
JK
1618static struct dquot **ext4_get_dquots(struct inode *inode)
1619{
1620 return EXT4_I(inode)->i_dquot;
1621}
1622
61e225dc 1623static const struct dquot_operations ext4_quota_operations = {
7a9ca53a
TE
1624 .get_reserved_space = ext4_get_reserved_space,
1625 .write_dquot = ext4_write_dquot,
1626 .acquire_dquot = ext4_acquire_dquot,
1627 .release_dquot = ext4_release_dquot,
1628 .mark_dirty = ext4_mark_dquot_dirty,
1629 .write_info = ext4_write_info,
1630 .alloc_dquot = dquot_alloc,
1631 .destroy_dquot = dquot_destroy,
1632 .get_projid = ext4_get_projid,
1633 .get_inode_usage = ext4_get_inode_usage,
ebc11f7b 1634 .get_next_id = dquot_get_next_id,
ac27a0ec
DK
1635};
1636
0d54b217 1637static const struct quotactl_ops ext4_qctl_operations = {
617ba13b 1638 .quota_on = ext4_quota_on,
ca0e05e4 1639 .quota_off = ext4_quota_off,
287a8095 1640 .quota_sync = dquot_quota_sync,
0a240339 1641 .get_state = dquot_get_state,
287a8095
CH
1642 .set_info = dquot_set_dqinfo,
1643 .get_dqblk = dquot_get_dqblk,
6332b9b5
ES
1644 .set_dqblk = dquot_set_dqblk,
1645 .get_nextdqblk = dquot_get_next_dqblk,
ac27a0ec
DK
1646};
1647#endif
1648
ee9b6d61 1649static const struct super_operations ext4_sops = {
617ba13b 1650 .alloc_inode = ext4_alloc_inode,
94053139 1651 .free_inode = ext4_free_in_core_inode,
617ba13b 1652 .destroy_inode = ext4_destroy_inode,
617ba13b
MC
1653 .write_inode = ext4_write_inode,
1654 .dirty_inode = ext4_dirty_inode,
7ff9c073 1655 .drop_inode = ext4_drop_inode,
0930fcc1 1656 .evict_inode = ext4_evict_inode,
617ba13b 1657 .put_super = ext4_put_super,
617ba13b 1658 .sync_fs = ext4_sync_fs,
c4be0c1d
TS
1659 .freeze_fs = ext4_freeze,
1660 .unfreeze_fs = ext4_unfreeze,
617ba13b
MC
1661 .statfs = ext4_statfs,
1662 .remount_fs = ext4_remount,
617ba13b 1663 .show_options = ext4_show_options,
ac27a0ec 1664#ifdef CONFIG_QUOTA
617ba13b
MC
1665 .quota_read = ext4_quota_read,
1666 .quota_write = ext4_quota_write,
96c7e0d9 1667 .get_dquots = ext4_get_dquots,
ac27a0ec
DK
1668#endif
1669};
1670
39655164 1671static const struct export_operations ext4_export_ops = {
1b961ac0
CH
1672 .fh_to_dentry = ext4_fh_to_dentry,
1673 .fh_to_parent = ext4_fh_to_parent,
617ba13b 1674 .get_parent = ext4_get_parent,
fde87268 1675 .commit_metadata = ext4_nfs_commit_metadata,
ac27a0ec
DK
1676};
1677
1678enum {
1679 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1680 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
72578c33 1681 Opt_nouid32, Opt_debug, Opt_removed,
ac27a0ec 1682 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
72578c33 1683 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
ad4eec61
ES
1684 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1685 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
ac27a0ec 1686 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
6ddb2447 1687 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
4f74d15f 1688 Opt_inlinecrypt,
ac27a0ec 1689 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
5a20bdfc 1690 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
ee4a3fcd 1691 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
9cb20f94
IW
1692 Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
1693 Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
327eaf73
TT
1694 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1695 Opt_nowarn_on_error, Opt_mblk_io_submit,
670e9875 1696 Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
1449032b 1697 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
5328e635 1698 Opt_inode_readahead_blks, Opt_journal_ioprio,
744692dc 1699 Opt_dioread_nolock, Opt_dioread_lock,
fc6cb1cd 1700 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
cdb7ee4c 1701 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
21175ca4 1702 Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
e5a185c2 1703 Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
8016e29f 1704#ifdef CONFIG_EXT4_DEBUG
99c880de 1705 Opt_fc_debug_max_replay, Opt_fc_debug_force
8016e29f 1706#endif
ac27a0ec
DK
1707};
1708
e5a185c2
LC
1709static const struct constant_table ext4_param_errors[] = {
1710 {"continue", Opt_err_cont},
1711 {"panic", Opt_err_panic},
1712 {"remount-ro", Opt_err_ro},
1713 {}
1714};
1715
1716static const struct constant_table ext4_param_data[] = {
1717 {"journal", Opt_data_journal},
1718 {"ordered", Opt_data_ordered},
1719 {"writeback", Opt_data_writeback},
1720 {}
1721};
1722
1723static const struct constant_table ext4_param_data_err[] = {
1724 {"abort", Opt_data_err_abort},
1725 {"ignore", Opt_data_err_ignore},
1726 {}
1727};
1728
1729static const struct constant_table ext4_param_jqfmt[] = {
1730 {"vfsold", Opt_jqfmt_vfsold},
1731 {"vfsv0", Opt_jqfmt_vfsv0},
1732 {"vfsv1", Opt_jqfmt_vfsv1},
1733 {}
1734};
1735
1736static const struct constant_table ext4_param_dax[] = {
1737 {"always", Opt_dax_always},
1738 {"inode", Opt_dax_inode},
1739 {"never", Opt_dax_never},
1740 {}
1741};
1742
1743/* String parameter that allows empty argument */
1744#define fsparam_string_empty(NAME, OPT) \
1745 __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
1746
1747/*
1748 * Mount option specification
1749 * We don't use fsparam_flag_no because of the way we set the
1750 * options and the way we show them in _ext4_show_options(). To
1751 * keep the changes to a minimum, let's keep the negative options
1752 * separate for now.
1753 */
1754static const struct fs_parameter_spec ext4_param_specs[] = {
1755 fsparam_flag ("bsddf", Opt_bsd_df),
1756 fsparam_flag ("minixdf", Opt_minix_df),
1757 fsparam_flag ("grpid", Opt_grpid),
1758 fsparam_flag ("bsdgroups", Opt_grpid),
1759 fsparam_flag ("nogrpid", Opt_nogrpid),
1760 fsparam_flag ("sysvgroups", Opt_nogrpid),
1761 fsparam_u32 ("resgid", Opt_resgid),
1762 fsparam_u32 ("resuid", Opt_resuid),
1763 fsparam_u32 ("sb", Opt_sb),
1764 fsparam_enum ("errors", Opt_errors, ext4_param_errors),
1765 fsparam_flag ("nouid32", Opt_nouid32),
1766 fsparam_flag ("debug", Opt_debug),
1767 fsparam_flag ("oldalloc", Opt_removed),
1768 fsparam_flag ("orlov", Opt_removed),
1769 fsparam_flag ("user_xattr", Opt_user_xattr),
1770 fsparam_flag ("nouser_xattr", Opt_nouser_xattr),
1771 fsparam_flag ("acl", Opt_acl),
1772 fsparam_flag ("noacl", Opt_noacl),
1773 fsparam_flag ("norecovery", Opt_noload),
1774 fsparam_flag ("noload", Opt_noload),
1775 fsparam_flag ("bh", Opt_removed),
1776 fsparam_flag ("nobh", Opt_removed),
1777 fsparam_u32 ("commit", Opt_commit),
1778 fsparam_u32 ("min_batch_time", Opt_min_batch_time),
1779 fsparam_u32 ("max_batch_time", Opt_max_batch_time),
1780 fsparam_u32 ("journal_dev", Opt_journal_dev),
1781 fsparam_bdev ("journal_path", Opt_journal_path),
1782 fsparam_flag ("journal_checksum", Opt_journal_checksum),
1783 fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum),
1784 fsparam_flag ("journal_async_commit",Opt_journal_async_commit),
1785 fsparam_flag ("abort", Opt_abort),
1786 fsparam_enum ("data", Opt_data, ext4_param_data),
1787 fsparam_enum ("data_err", Opt_data_err,
1788 ext4_param_data_err),
1789 fsparam_string_empty
1790 ("usrjquota", Opt_usrjquota),
1791 fsparam_string_empty
1792 ("grpjquota", Opt_grpjquota),
1793 fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt),
1794 fsparam_flag ("grpquota", Opt_grpquota),
1795 fsparam_flag ("quota", Opt_quota),
1796 fsparam_flag ("noquota", Opt_noquota),
1797 fsparam_flag ("usrquota", Opt_usrquota),
1798 fsparam_flag ("prjquota", Opt_prjquota),
1799 fsparam_flag ("barrier", Opt_barrier),
1800 fsparam_u32 ("barrier", Opt_barrier),
1801 fsparam_flag ("nobarrier", Opt_nobarrier),
1802 fsparam_flag ("i_version", Opt_i_version),
1803 fsparam_flag ("dax", Opt_dax),
1804 fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
1805 fsparam_u32 ("stripe", Opt_stripe),
1806 fsparam_flag ("delalloc", Opt_delalloc),
1807 fsparam_flag ("nodelalloc", Opt_nodelalloc),
1808 fsparam_flag ("warn_on_error", Opt_warn_on_error),
1809 fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error),
1810 fsparam_flag ("lazytime", Opt_lazytime),
1811 fsparam_flag ("nolazytime", Opt_nolazytime),
1812 fsparam_u32 ("debug_want_extra_isize",
1813 Opt_debug_want_extra_isize),
1814 fsparam_flag ("mblk_io_submit", Opt_removed),
1815 fsparam_flag ("nomblk_io_submit", Opt_removed),
1816 fsparam_flag ("block_validity", Opt_block_validity),
1817 fsparam_flag ("noblock_validity", Opt_noblock_validity),
1818 fsparam_u32 ("inode_readahead_blks",
1819 Opt_inode_readahead_blks),
1820 fsparam_u32 ("journal_ioprio", Opt_journal_ioprio),
1821 fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc),
1822 fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc),
1823 fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc),
1824 fsparam_flag ("dioread_nolock", Opt_dioread_nolock),
1825 fsparam_flag ("nodioread_nolock", Opt_dioread_lock),
1826 fsparam_flag ("dioread_lock", Opt_dioread_lock),
1827 fsparam_flag ("discard", Opt_discard),
1828 fsparam_flag ("nodiscard", Opt_nodiscard),
1829 fsparam_u32 ("init_itable", Opt_init_itable),
1830 fsparam_flag ("init_itable", Opt_init_itable),
1831 fsparam_flag ("noinit_itable", Opt_noinit_itable),
1832#ifdef CONFIG_EXT4_DEBUG
1833 fsparam_flag ("fc_debug_force", Opt_fc_debug_force),
1834 fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1835#endif
1836 fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb),
1837 fsparam_flag ("test_dummy_encryption",
1838 Opt_test_dummy_encryption),
1839 fsparam_string ("test_dummy_encryption",
1840 Opt_test_dummy_encryption),
1841 fsparam_flag ("inlinecrypt", Opt_inlinecrypt),
1842 fsparam_flag ("nombcache", Opt_nombcache),
1843 fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */
1844 fsparam_flag ("prefetch_block_bitmaps",
1845 Opt_removed),
1846 fsparam_flag ("no_prefetch_block_bitmaps",
1847 Opt_no_prefetch_block_bitmaps),
1848 fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan),
1849 fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */
1850 fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */
1851 fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */
1852 fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */
1853 fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */
1854 {}
1855};
1856
a447c093 1857static const match_table_t tokens = {
ac27a0ec
DK
1858 {Opt_bsd_df, "bsddf"},
1859 {Opt_minix_df, "minixdf"},
1860 {Opt_grpid, "grpid"},
1861 {Opt_grpid, "bsdgroups"},
1862 {Opt_nogrpid, "nogrpid"},
1863 {Opt_nogrpid, "sysvgroups"},
1864 {Opt_resgid, "resgid=%u"},
1865 {Opt_resuid, "resuid=%u"},
1866 {Opt_sb, "sb=%u"},
1867 {Opt_err_cont, "errors=continue"},
1868 {Opt_err_panic, "errors=panic"},
1869 {Opt_err_ro, "errors=remount-ro"},
1870 {Opt_nouid32, "nouid32"},
ac27a0ec 1871 {Opt_debug, "debug"},
72578c33
TT
1872 {Opt_removed, "oldalloc"},
1873 {Opt_removed, "orlov"},
ac27a0ec
DK
1874 {Opt_user_xattr, "user_xattr"},
1875 {Opt_nouser_xattr, "nouser_xattr"},
1876 {Opt_acl, "acl"},
1877 {Opt_noacl, "noacl"},
e3bb52ae 1878 {Opt_noload, "norecovery"},
5a916be1 1879 {Opt_noload, "noload"},
72578c33
TT
1880 {Opt_removed, "nobh"},
1881 {Opt_removed, "bh"},
ac27a0ec 1882 {Opt_commit, "commit=%u"},
30773840
TT
1883 {Opt_min_batch_time, "min_batch_time=%u"},
1884 {Opt_max_batch_time, "max_batch_time=%u"},
ac27a0ec 1885 {Opt_journal_dev, "journal_dev=%u"},
ad4eec61 1886 {Opt_journal_path, "journal_path=%s"},
818d276c 1887 {Opt_journal_checksum, "journal_checksum"},
c6d3d56d 1888 {Opt_nojournal_checksum, "nojournal_checksum"},
818d276c 1889 {Opt_journal_async_commit, "journal_async_commit"},
ac27a0ec
DK
1890 {Opt_abort, "abort"},
1891 {Opt_data_journal, "data=journal"},
1892 {Opt_data_ordered, "data=ordered"},
1893 {Opt_data_writeback, "data=writeback"},
5bf5683a
HK
1894 {Opt_data_err_abort, "data_err=abort"},
1895 {Opt_data_err_ignore, "data_err=ignore"},
ac27a0ec
DK
1896 {Opt_offusrjquota, "usrjquota="},
1897 {Opt_usrjquota, "usrjquota=%s"},
1898 {Opt_offgrpjquota, "grpjquota="},
1899 {Opt_grpjquota, "grpjquota=%s"},
1900 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1901 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
5a20bdfc 1902 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
ac27a0ec
DK
1903 {Opt_grpquota, "grpquota"},
1904 {Opt_noquota, "noquota"},
1905 {Opt_quota, "quota"},
1906 {Opt_usrquota, "usrquota"},
49da9392 1907 {Opt_prjquota, "prjquota"},
ac27a0ec 1908 {Opt_barrier, "barrier=%u"},
06705bff
TT
1909 {Opt_barrier, "barrier"},
1910 {Opt_nobarrier, "nobarrier"},
25ec56b5 1911 {Opt_i_version, "i_version"},
923ae0ff 1912 {Opt_dax, "dax"},
9cb20f94
IW
1913 {Opt_dax_always, "dax=always"},
1914 {Opt_dax_inode, "dax=inode"},
1915 {Opt_dax_never, "dax=never"},
c9de560d 1916 {Opt_stripe, "stripe=%u"},
64769240 1917 {Opt_delalloc, "delalloc"},
327eaf73
TT
1918 {Opt_warn_on_error, "warn_on_error"},
1919 {Opt_nowarn_on_error, "nowarn_on_error"},
a26f4992
TT
1920 {Opt_lazytime, "lazytime"},
1921 {Opt_nolazytime, "nolazytime"},
670e9875 1922 {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
dd919b98 1923 {Opt_nodelalloc, "nodelalloc"},
36ade451
JK
1924 {Opt_removed, "mblk_io_submit"},
1925 {Opt_removed, "nomblk_io_submit"},
6fd058f7
TT
1926 {Opt_block_validity, "block_validity"},
1927 {Opt_noblock_validity, "noblock_validity"},
240799cd 1928 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
b3881f74 1929 {Opt_journal_ioprio, "journal_ioprio=%u"},
afd4672d 1930 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
06705bff
TT
1931 {Opt_auto_da_alloc, "auto_da_alloc"},
1932 {Opt_noauto_da_alloc, "noauto_da_alloc"},
744692dc 1933 {Opt_dioread_nolock, "dioread_nolock"},
244adf64 1934 {Opt_dioread_lock, "nodioread_nolock"},
744692dc 1935 {Opt_dioread_lock, "dioread_lock"},
5328e635
ES
1936 {Opt_discard, "discard"},
1937 {Opt_nodiscard, "nodiscard"},
fc6cb1cd
TT
1938 {Opt_init_itable, "init_itable=%u"},
1939 {Opt_init_itable, "init_itable"},
1940 {Opt_noinit_itable, "noinit_itable"},
8016e29f 1941#ifdef CONFIG_EXT4_DEBUG
99c880de 1942 {Opt_fc_debug_force, "fc_debug_force"},
8016e29f
HS
1943 {Opt_fc_debug_max_replay, "fc_debug_max_replay=%u"},
1944#endif
df981d03 1945 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
ed318a6c 1946 {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
6ddb2447 1947 {Opt_test_dummy_encryption, "test_dummy_encryption"},
4f74d15f 1948 {Opt_inlinecrypt, "inlinecrypt"},
cdb7ee4c
TE
1949 {Opt_nombcache, "nombcache"},
1950 {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
21175ca4
HS
1951 {Opt_removed, "prefetch_block_bitmaps"},
1952 {Opt_no_prefetch_block_bitmaps, "no_prefetch_block_bitmaps"},
196e402a 1953 {Opt_mb_optimize_scan, "mb_optimize_scan=%d"},
c7198b9c
TT
1954 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1955 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1956 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
1957 {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
1958 {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
f3f12faa 1959 {Opt_err, NULL},
ac27a0ec
DK
1960};
1961
617ba13b 1962static ext4_fsblk_t get_sb_block(void **data)
ac27a0ec 1963{
617ba13b 1964 ext4_fsblk_t sb_block;
ac27a0ec
DK
1965 char *options = (char *) *data;
1966
1967 if (!options || strncmp(options, "sb=", 3) != 0)
1968 return 1; /* Default location */
0b8e58a1 1969
ac27a0ec 1970 options += 3;
0b8e58a1 1971 /* TODO: use simple_strtoll with >32bit ext4 */
ac27a0ec
DK
1972 sb_block = simple_strtoul(options, &options, 0);
1973 if (*options && *options != ',') {
4776004f 1974 printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
ac27a0ec
DK
1975 (char *) *data);
1976 return 1;
1977 }
1978 if (*options == ',')
1979 options++;
1980 *data = (void *) options;
0b8e58a1 1981
ac27a0ec
DK
1982 return sb_block;
1983}
1984
b3881f74 1985#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
196e402a
HS
1986#define DEFAULT_MB_OPTIMIZE_SCAN (-1)
1987
d6006186
EB
1988static const char deprecated_msg[] =
1989 "Mount option \"%s\" will be removed by %s\n"
437ca0fd 1990 "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
b3881f74 1991
26092bf5
TT
1992#define MOPT_SET 0x0001
1993#define MOPT_CLEAR 0x0002
1994#define MOPT_NOSUPPORT 0x0004
1995#define MOPT_EXPLICIT 0x0008
1996#define MOPT_CLEAR_ERR 0x0010
1997#define MOPT_GTE0 0x0020
ac27a0ec 1998#ifdef CONFIG_QUOTA
26092bf5
TT
1999#define MOPT_Q 0
2000#define MOPT_QFMT 0x0040
2001#else
2002#define MOPT_Q MOPT_NOSUPPORT
2003#define MOPT_QFMT MOPT_NOSUPPORT
ac27a0ec 2004#endif
26092bf5 2005#define MOPT_DATAJ 0x0080
8dc0aa8c
TT
2006#define MOPT_NO_EXT2 0x0100
2007#define MOPT_NO_EXT3 0x0200
2008#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
ad4eec61 2009#define MOPT_STRING 0x0400
9cb20f94 2010#define MOPT_SKIP 0x0800
995a3ed6 2011#define MOPT_2 0x1000
26092bf5
TT
2012
2013static const struct mount_opts {
2014 int token;
2015 int mount_opt;
2016 int flags;
2017} ext4_mount_opts[] = {
2018 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
2019 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
2020 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
2021 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
26092bf5
TT
2022 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
2023 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
8dc0aa8c
TT
2024 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
2025 MOPT_EXT4_ONLY | MOPT_SET},
2026 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
2027 MOPT_EXT4_ONLY | MOPT_CLEAR},
26092bf5
TT
2028 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
2029 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
8dc0aa8c
TT
2030 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
2031 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
2032 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
59d9fa5c 2033 MOPT_EXT4_ONLY | MOPT_CLEAR},
327eaf73
TT
2034 {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
2035 {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
c6d3d56d
DW
2036 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
2037 MOPT_EXT4_ONLY | MOPT_CLEAR},
8dc0aa8c 2038 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1e381f60 2039 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
26092bf5 2040 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
8dc0aa8c 2041 EXT4_MOUNT_JOURNAL_CHECKSUM),
1e381f60 2042 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
8dc0aa8c 2043 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
26092bf5
TT
2044 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
2045 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
2046 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
8dc0aa8c 2047 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
7915a861 2048 MOPT_NO_EXT2},
8dc0aa8c 2049 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
7915a861 2050 MOPT_NO_EXT2},
26092bf5
TT
2051 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
2052 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
2053 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
2054 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
2055 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
2056 {Opt_commit, 0, MOPT_GTE0},
2057 {Opt_max_batch_time, 0, MOPT_GTE0},
2058 {Opt_min_batch_time, 0, MOPT_GTE0},
2059 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
2060 {Opt_init_itable, 0, MOPT_GTE0},
9cb20f94
IW
2061 {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
2062 {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
2063 MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
2064 {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
2065 MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
2066 {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
2067 MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
26092bf5 2068 {Opt_stripe, 0, MOPT_GTE0},
0efb3b23
JK
2069 {Opt_resuid, 0, MOPT_GTE0},
2070 {Opt_resgid, 0, MOPT_GTE0},
5ba92bcf
CM
2071 {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
2072 {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
2073 {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
8dc0aa8c
TT
2074 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
2075 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
2076 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
2077 MOPT_NO_EXT2 | MOPT_DATAJ},
26092bf5
TT
2078 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
2079 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
03010a33 2080#ifdef CONFIG_EXT4_FS_POSIX_ACL
26092bf5
TT
2081 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
2082 {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
ac27a0ec 2083#else
26092bf5
TT
2084 {Opt_acl, 0, MOPT_NOSUPPORT},
2085 {Opt_noacl, 0, MOPT_NOSUPPORT},
ac27a0ec 2086#endif
26092bf5
TT
2087 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
2088 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
670e9875 2089 {Opt_debug_want_extra_isize, 0, MOPT_GTE0},
26092bf5
TT
2090 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
2091 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
2092 MOPT_SET | MOPT_Q},
2093 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
2094 MOPT_SET | MOPT_Q},
49da9392
JK
2095 {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
2096 MOPT_SET | MOPT_Q},
26092bf5 2097 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
49da9392
JK
2098 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
2099 MOPT_CLEAR | MOPT_Q},
174fe5ba
KX
2100 {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
2101 {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
26092bf5
TT
2102 {Opt_offusrjquota, 0, MOPT_Q},
2103 {Opt_offgrpjquota, 0, MOPT_Q},
2104 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
2105 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
2106 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
df981d03 2107 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
ed318a6c 2108 {Opt_test_dummy_encryption, 0, MOPT_STRING},
cdb7ee4c 2109 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
21175ca4 2110 {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
3d392b26 2111 MOPT_SET},
196e402a 2112 {Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0},
99c880de 2113#ifdef CONFIG_EXT4_DEBUG
0f0672ff
HS
2114 {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
2115 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
8016e29f
HS
2116 {Opt_fc_debug_max_replay, 0, MOPT_GTE0},
2117#endif
26092bf5
TT
2118 {Opt_err, 0, 0}
2119};
2120
c83ad55e
GKB
2121#ifdef CONFIG_UNICODE
2122static const struct ext4_sb_encodings {
2123 __u16 magic;
2124 char *name;
2125 char *version;
2126} ext4_sb_encoding_map[] = {
2127 {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"},
2128};
2129
2130static int ext4_sb_read_encoding(const struct ext4_super_block *es,
2131 const struct ext4_sb_encodings **encoding,
2132 __u16 *flags)
2133{
2134 __u16 magic = le16_to_cpu(es->s_encoding);
2135 int i;
2136
2137 for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
2138 if (magic == ext4_sb_encoding_map[i].magic)
2139 break;
2140
2141 if (i >= ARRAY_SIZE(ext4_sb_encoding_map))
2142 return -EINVAL;
2143
2144 *encoding = &ext4_sb_encoding_map[i];
2145 *flags = le16_to_cpu(es->s_encoding_flags);
2146
2147 return 0;
2148}
2149#endif
2150
6e47a3cc 2151static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg)
ed318a6c
EB
2152{
2153#ifdef CONFIG_FS_ENCRYPTION
2154 struct ext4_sb_info *sbi = EXT4_SB(sb);
2155 int err;
2156
6e47a3cc 2157 err = fscrypt_set_test_dummy_encryption(sb, arg,
ac4acb1f 2158 &sbi->s_dummy_enc_policy);
ed318a6c 2159 if (err) {
6e47a3cc
LC
2160 ext4_msg(sb, KERN_WARNING,
2161 "Error while setting test dummy encryption [%d]", err);
2162 return err;
ed318a6c
EB
2163 }
2164 ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
ed318a6c 2165#endif
6e47a3cc 2166 return 0;
ed318a6c
EB
2167}
2168
6e47a3cc
LC
2169#define EXT4_SPEC_JQUOTA (1 << 0)
2170#define EXT4_SPEC_JQFMT (1 << 1)
2171#define EXT4_SPEC_DATAJ (1 << 2)
2172#define EXT4_SPEC_SB_BLOCK (1 << 3)
2173#define EXT4_SPEC_JOURNAL_DEV (1 << 4)
2174#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
2175#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6)
2176#define EXT4_SPEC_s_want_extra_isize (1 << 7)
2177#define EXT4_SPEC_s_max_batch_time (1 << 8)
2178#define EXT4_SPEC_s_min_batch_time (1 << 9)
2179#define EXT4_SPEC_s_inode_readahead_blks (1 << 10)
2180#define EXT4_SPEC_s_li_wait_mult (1 << 11)
2181#define EXT4_SPEC_s_max_dir_size_kb (1 << 12)
2182#define EXT4_SPEC_s_stripe (1 << 13)
2183#define EXT4_SPEC_s_resuid (1 << 14)
2184#define EXT4_SPEC_s_resgid (1 << 15)
2185#define EXT4_SPEC_s_commit_interval (1 << 16)
2186#define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
2187
461c3af0 2188struct ext4_fs_context {
e6e268cb 2189 char *s_qf_names[EXT4_MAXQUOTAS];
6e47a3cc 2190 char *test_dummy_enc_arg;
e6e268cb 2191 int s_jquota_fmt; /* Format of quota to use */
6e47a3cc
LC
2192 int mb_optimize_scan;
2193#ifdef CONFIG_EXT4_DEBUG
2194 int s_fc_debug_max_replay;
2195#endif
e6e268cb 2196 unsigned short qname_spec;
6e47a3cc
LC
2197 unsigned long vals_s_flags; /* Bits to set in s_flags */
2198 unsigned long mask_s_flags; /* Bits changed in s_flags */
e6e268cb 2199 unsigned long journal_devnum;
6e47a3cc
LC
2200 unsigned long s_commit_interval;
2201 unsigned long s_stripe;
2202 unsigned int s_inode_readahead_blks;
2203 unsigned int s_want_extra_isize;
2204 unsigned int s_li_wait_mult;
2205 unsigned int s_max_dir_size_kb;
e6e268cb 2206 unsigned int journal_ioprio;
6e47a3cc
LC
2207 unsigned int vals_s_mount_opt;
2208 unsigned int mask_s_mount_opt;
2209 unsigned int vals_s_mount_opt2;
2210 unsigned int mask_s_mount_opt2;
2211 unsigned int vals_s_mount_flags;
2212 unsigned int mask_s_mount_flags;
b6bd2435 2213 unsigned int opt_flags; /* MOPT flags */
6e47a3cc
LC
2214 unsigned int spec;
2215 u32 s_max_batch_time;
2216 u32 s_min_batch_time;
2217 kuid_t s_resuid;
2218 kgid_t s_resgid;
b237e304
HS
2219};
2220
e6e268cb
LC
2221#ifdef CONFIG_QUOTA
2222/*
2223 * Note the name of the specified quota file.
2224 */
2225static int note_qf_name(struct fs_context *fc, int qtype,
2226 struct fs_parameter *param)
2227{
2228 struct ext4_fs_context *ctx = fc->fs_private;
2229 char *qname;
2230
2231 if (param->size < 1) {
2232 ext4_msg(NULL, KERN_ERR, "Missing quota name");
2233 return -EINVAL;
2234 }
2235 if (strchr(param->string, '/')) {
2236 ext4_msg(NULL, KERN_ERR,
2237 "quotafile must be on filesystem root");
2238 return -EINVAL;
2239 }
2240 if (ctx->s_qf_names[qtype]) {
2241 if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
2242 ext4_msg(NULL, KERN_ERR,
2243 "%s quota file already specified",
2244 QTYPE2NAME(qtype));
2245 return -EINVAL;
2246 }
2247 return 0;
2248 }
2249
2250 qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
2251 if (!qname) {
2252 ext4_msg(NULL, KERN_ERR,
2253 "Not enough memory for storing quotafile name");
2254 return -ENOMEM;
2255 }
2256 ctx->s_qf_names[qtype] = qname;
2257 ctx->qname_spec |= 1 << qtype;
6e47a3cc 2258 ctx->spec |= EXT4_SPEC_JQUOTA;
e6e268cb
LC
2259 return 0;
2260}
2261
2262/*
2263 * Clear the name of the specified quota file.
2264 */
2265static int unnote_qf_name(struct fs_context *fc, int qtype)
2266{
2267 struct ext4_fs_context *ctx = fc->fs_private;
2268
2269 if (ctx->s_qf_names[qtype])
2270 kfree(ctx->s_qf_names[qtype]);
2271
2272 ctx->s_qf_names[qtype] = NULL;
2273 ctx->qname_spec |= 1 << qtype;
6e47a3cc 2274 ctx->spec |= EXT4_SPEC_JQUOTA;
e6e268cb
LC
2275 return 0;
2276}
2277#endif
2278
6e47a3cc
LC
2279#define EXT4_SET_CTX(name) \
2280static inline void ctx_set_##name(struct ext4_fs_context *ctx, int flag)\
2281{ \
2282 ctx->mask_s_##name |= flag; \
2283 ctx->vals_s_##name |= flag; \
2284} \
2285static inline void ctx_clear_##name(struct ext4_fs_context *ctx, int flag)\
2286{ \
2287 ctx->mask_s_##name |= flag; \
2288 ctx->vals_s_##name &= ~flag; \
2289} \
2290static inline bool ctx_test_##name(struct ext4_fs_context *ctx, int flag)\
2291{ \
2292 return ((ctx->vals_s_##name & flag) != 0); \
2293} \
2294
2295EXT4_SET_CTX(flags);
2296EXT4_SET_CTX(mount_opt);
2297EXT4_SET_CTX(mount_opt2);
2298EXT4_SET_CTX(mount_flags);
2299
461c3af0 2300static int handle_mount_opt(struct fs_context *fc, struct fs_parameter *param)
26092bf5 2301{
461c3af0 2302 struct ext4_fs_context *ctx = fc->fs_private;
461c3af0 2303 struct fs_parse_result result;
26092bf5 2304 const struct mount_opts *m;
461c3af0 2305 int is_remount;
08cefc7a
EB
2306 kuid_t uid;
2307 kgid_t gid;
461c3af0
LC
2308 int token;
2309
2310 token = fs_parse(fc, ext4_param_specs, param, &result);
2311 if (token < 0)
2312 return token;
2313 is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
26092bf5 2314
57f73c2c 2315#ifdef CONFIG_QUOTA
461c3af0
LC
2316 if (token == Opt_usrjquota) {
2317 if (!*param->string)
e6e268cb 2318 return unnote_qf_name(fc, USRQUOTA);
461c3af0 2319 else
e6e268cb 2320 return note_qf_name(fc, USRQUOTA, param);
461c3af0
LC
2321 } else if (token == Opt_grpjquota) {
2322 if (!*param->string)
e6e268cb 2323 return unnote_qf_name(fc, GRPQUOTA);
461c3af0 2324 else
e6e268cb 2325 return note_qf_name(fc, GRPQUOTA, param);
461c3af0 2326 }
57f73c2c 2327#endif
26092bf5 2328 switch (token) {
f7048605
TT
2329 case Opt_noacl:
2330 case Opt_nouser_xattr:
da812f61 2331 ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5");
f7048605 2332 break;
26092bf5
TT
2333 case Opt_sb:
2334 return 1; /* handled by get_sb_block() */
2335 case Opt_removed:
da812f61 2336 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
461c3af0 2337 param->key);
26092bf5 2338 return 1;
26092bf5 2339 case Opt_abort:
6e47a3cc 2340 ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED);
26092bf5
TT
2341 return 1;
2342 case Opt_i_version:
6e47a3cc 2343 ctx_set_flags(ctx, SB_I_VERSION);
26092bf5 2344 return 1;
a26f4992 2345 case Opt_lazytime:
6e47a3cc 2346 ctx_set_flags(ctx, SB_LAZYTIME);
a26f4992
TT
2347 return 1;
2348 case Opt_nolazytime:
6e47a3cc 2349 ctx_clear_flags(ctx, SB_LAZYTIME);
a26f4992 2350 return 1;
4f74d15f
EB
2351 case Opt_inlinecrypt:
2352#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
6e47a3cc 2353 ctx_set_flags(ctx, SB_INLINECRYPT);
4f74d15f 2354#else
da812f61 2355 ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
4f74d15f
EB
2356#endif
2357 return 1;
461c3af0
LC
2358 case Opt_errors:
2359 case Opt_data:
2360 case Opt_data_err:
2361 case Opt_jqfmt:
2362 case Opt_dax_type:
2363 token = result.uint_32;
26092bf5
TT
2364 }
2365
5f3633e3
JK
2366 for (m = ext4_mount_opts; m->token != Opt_err; m++)
2367 if (token == m->token)
2368 break;
2369
b6bd2435
LC
2370 ctx->opt_flags |= m->flags;
2371
5f3633e3 2372 if (m->token == Opt_err) {
da812f61 2373 ext4_msg(NULL, KERN_ERR, "Unrecognized mount option \"%s\" "
461c3af0 2374 "or missing value", param->key);
da812f61 2375 return -EINVAL;
5f3633e3
JK
2376 }
2377
c93cf2d7
DM
2378 if (m->flags & MOPT_EXPLICIT) {
2379 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
6e47a3cc 2380 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
1e381f60 2381 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
6e47a3cc
LC
2382 ctx_set_mount_opt2(ctx,
2383 EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
c93cf2d7 2384 } else
da812f61 2385 return -EINVAL;
c93cf2d7 2386 }
5f3633e3 2387 if (m->flags & MOPT_CLEAR_ERR)
6e47a3cc 2388 ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
5f3633e3
JK
2389
2390 if (m->flags & MOPT_NOSUPPORT) {
da812f61 2391 ext4_msg(NULL, KERN_ERR, "%s option not supported",
461c3af0 2392 param->key);
5f3633e3 2393 } else if (token == Opt_commit) {
461c3af0 2394 if (result.uint_32 == 0)
6e47a3cc 2395 ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE;
461c3af0 2396 else if (result.uint_32 > INT_MAX / HZ) {
da812f61 2397 ext4_msg(NULL, KERN_ERR,
9ba55543 2398 "Invalid commit interval %d, "
2399 "must be smaller than %d",
461c3af0 2400 result.uint_32, INT_MAX / HZ);
da812f61 2401 return -EINVAL;
9ba55543 2402 }
6e47a3cc
LC
2403 ctx->s_commit_interval = HZ * result.uint_32;
2404 ctx->spec |= EXT4_SPEC_s_commit_interval;
670e9875 2405 } else if (token == Opt_debug_want_extra_isize) {
6e47a3cc 2406 if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
da812f61 2407 ext4_msg(NULL, KERN_ERR,
461c3af0 2408 "Invalid want_extra_isize %d", result.uint_32);
da812f61 2409 return -EINVAL;
9803387c 2410 }
6e47a3cc
LC
2411 ctx->s_want_extra_isize = result.uint_32;
2412 ctx->spec |= EXT4_SPEC_s_want_extra_isize;
5f3633e3 2413 } else if (token == Opt_max_batch_time) {
6e47a3cc
LC
2414 ctx->s_max_batch_time = result.uint_32;
2415 ctx->spec |= EXT4_SPEC_s_max_batch_time;
5f3633e3 2416 } else if (token == Opt_min_batch_time) {
6e47a3cc
LC
2417 ctx->s_min_batch_time = result.uint_32;
2418 ctx->spec |= EXT4_SPEC_s_min_batch_time;
5f3633e3 2419 } else if (token == Opt_inode_readahead_blks) {
461c3af0
LC
2420 if (result.uint_32 &&
2421 (result.uint_32 > (1 << 30) ||
2422 !is_power_of_2(result.uint_32))) {
da812f61 2423 ext4_msg(NULL, KERN_ERR,
e33e60ea
JK
2424 "EXT4-fs: inode_readahead_blks must be "
2425 "0 or a power of 2 smaller than 2^31");
da812f61 2426 return -EINVAL;
5f3633e3 2427 }
6e47a3cc
LC
2428 ctx->s_inode_readahead_blks = result.uint_32;
2429 ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
5f3633e3 2430 } else if (token == Opt_init_itable) {
6e47a3cc
LC
2431 ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2432 ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
461c3af0 2433 if (param->type == fs_value_is_string)
6e47a3cc
LC
2434 ctx->s_li_wait_mult = result.uint_32;
2435 ctx->spec |= EXT4_SPEC_s_li_wait_mult;
5f3633e3 2436 } else if (token == Opt_max_dir_size_kb) {
6e47a3cc
LC
2437 ctx->s_max_dir_size_kb = result.uint_32;
2438 ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
8016e29f
HS
2439#ifdef CONFIG_EXT4_DEBUG
2440 } else if (token == Opt_fc_debug_max_replay) {
6e47a3cc
LC
2441 ctx->s_fc_debug_max_replay = result.uint_32;
2442 ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
8016e29f 2443#endif
5f3633e3 2444 } else if (token == Opt_stripe) {
6e47a3cc
LC
2445 ctx->s_stripe = result.uint_32;
2446 ctx->spec |= EXT4_SPEC_s_stripe;
5f3633e3 2447 } else if (token == Opt_resuid) {
461c3af0 2448 uid = make_kuid(current_user_ns(), result.uint_32);
5f3633e3 2449 if (!uid_valid(uid)) {
da812f61 2450 ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
461c3af0 2451 result.uint_32);
da812f61 2452 return -EINVAL;
26092bf5 2453 }
6e47a3cc
LC
2454 ctx->s_resuid = uid;
2455 ctx->spec |= EXT4_SPEC_s_resuid;
5f3633e3 2456 } else if (token == Opt_resgid) {
461c3af0 2457 gid = make_kgid(current_user_ns(), result.uint_32);
5f3633e3 2458 if (!gid_valid(gid)) {
da812f61 2459 ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
461c3af0 2460 result.uint_32);
da812f61 2461 return -EINVAL;
5f3633e3 2462 }
6e47a3cc
LC
2463 ctx->s_resgid = gid;
2464 ctx->spec |= EXT4_SPEC_s_resgid;
5f3633e3
JK
2465 } else if (token == Opt_journal_dev) {
2466 if (is_remount) {
da812f61 2467 ext4_msg(NULL, KERN_ERR,
5f3633e3 2468 "Cannot specify journal on remount");
da812f61 2469 return -EINVAL;
5f3633e3 2470 }
461c3af0 2471 ctx->journal_devnum = result.uint_32;
6e47a3cc 2472 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
ad4eec61 2473 } else if (token == Opt_journal_path) {
ad4eec61
ES
2474 struct inode *journal_inode;
2475 struct path path;
2476 int error;
2477
2478 if (is_remount) {
da812f61 2479 ext4_msg(NULL, KERN_ERR,
ad4eec61 2480 "Cannot specify journal on remount");
da812f61 2481 return -EINVAL;
ad4eec61 2482 }
ad4eec61 2483
461c3af0 2484 error = fs_lookup_param(fc, param, 1, &path);
ad4eec61 2485 if (error) {
da812f61 2486 ext4_msg(NULL, KERN_ERR, "error: could not find "
461c3af0 2487 "journal device path");
da812f61 2488 return -EINVAL;
ad4eec61
ES
2489 }
2490
2b0143b5 2491 journal_inode = d_inode(path.dentry);
461c3af0 2492 ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
6e47a3cc 2493 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
ad4eec61 2494 path_put(&path);
5f3633e3 2495 } else if (token == Opt_journal_ioprio) {
461c3af0 2496 if (result.uint_32 > 7) {
da812f61 2497 ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
5f3633e3 2498 " (must be 0-7)");
da812f61 2499 return -EINVAL;
5f3633e3 2500 }
461c3af0
LC
2501 ctx->journal_ioprio =
2502 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
6e47a3cc 2503 ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
6ddb2447 2504 } else if (token == Opt_test_dummy_encryption) {
6e47a3cc
LC
2505#ifdef CONFIG_FS_ENCRYPTION
2506 if (param->type == fs_value_is_flag) {
2507 ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION;
2508 ctx->test_dummy_enc_arg = NULL;
2509 return 1;
ac27a0ec 2510 }
6e47a3cc
LC
2511 if (*param->string &&
2512 !(!strcmp(param->string, "v1") ||
2513 !strcmp(param->string, "v2"))) {
2514 ext4_msg(NULL, KERN_WARNING,
2515 "Value of option \"%s\" is unrecognized",
2516 param->key);
2517 return -EINVAL;
2518 }
2519 ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION;
2520 ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size,
2521 GFP_KERNEL);
2522#else
2523 ext4_msg(NULL, KERN_WARNING,
2524 "Test dummy encryption mount option ignored");
2525#endif
2526 } else if (m->flags & MOPT_DATAJ) {
2527 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2528 ctx_set_mount_opt(ctx, m->mount_opt);
2529 ctx->spec |= EXT4_SPEC_DATAJ;
5f3633e3
JK
2530#ifdef CONFIG_QUOTA
2531 } else if (m->flags & MOPT_QFMT) {
e6e268cb 2532 ctx->s_jquota_fmt = m->mount_opt;
6e47a3cc 2533 ctx->spec |= EXT4_SPEC_JQFMT;
923ae0ff 2534#endif
9cb20f94
IW
2535 } else if (token == Opt_dax || token == Opt_dax_always ||
2536 token == Opt_dax_inode || token == Opt_dax_never) {
ef83b6e8 2537#ifdef CONFIG_FS_DAX
9cb20f94
IW
2538 switch (token) {
2539 case Opt_dax:
2540 case Opt_dax_always:
6e47a3cc
LC
2541 ctx_set_mount_opt(ctx, m->mount_opt);
2542 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
9cb20f94
IW
2543 break;
2544 case Opt_dax_never:
6e47a3cc
LC
2545 ctx_set_mount_opt2(ctx, m->mount_opt);
2546 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
9cb20f94
IW
2547 break;
2548 case Opt_dax_inode:
6e47a3cc
LC
2549 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2550 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
9cb20f94 2551 /* Strictly for printing options */
6e47a3cc 2552 ctx_set_mount_opt2(ctx, m->mount_opt);
9cb20f94
IW
2553 break;
2554 }
ef83b6e8 2555#else
da812f61 2556 ext4_msg(NULL, KERN_INFO, "dax option not supported");
6e47a3cc
LC
2557 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2558 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
da812f61 2559 return -EINVAL;
5f3633e3 2560#endif
7915a861 2561 } else if (token == Opt_data_err_abort) {
6e47a3cc 2562 ctx_set_mount_opt(ctx, m->mount_opt);
7915a861 2563 } else if (token == Opt_data_err_ignore) {
6e47a3cc 2564 ctx_clear_mount_opt(ctx, m->mount_opt);
196e402a 2565 } else if (token == Opt_mb_optimize_scan) {
461c3af0 2566 if (result.int_32 != 0 && result.int_32 != 1) {
da812f61 2567 ext4_msg(NULL, KERN_WARNING,
196e402a 2568 "mb_optimize_scan should be set to 0 or 1.");
da812f61 2569 return -EINVAL;
196e402a 2570 }
461c3af0 2571 ctx->mb_optimize_scan = result.int_32;
5f3633e3 2572 } else {
461c3af0
LC
2573 unsigned int set = 0;
2574
2575 if ((param->type == fs_value_is_flag) ||
2576 result.uint_32 > 0)
2577 set = 1;
2578
5f3633e3 2579 if (m->flags & MOPT_CLEAR)
461c3af0 2580 set = !set;
5f3633e3 2581 else if (unlikely(!(m->flags & MOPT_SET))) {
da812f61 2582 ext4_msg(NULL, KERN_WARNING,
461c3af0
LC
2583 "buggy handling of option %s",
2584 param->key);
5f3633e3 2585 WARN_ON(1);
da812f61 2586 return -EINVAL;
5f3633e3 2587 }
995a3ed6 2588 if (m->flags & MOPT_2) {
461c3af0 2589 if (set != 0)
6e47a3cc 2590 ctx_set_mount_opt2(ctx, m->mount_opt);
995a3ed6 2591 else
6e47a3cc 2592 ctx_clear_mount_opt2(ctx, m->mount_opt);
995a3ed6 2593 } else {
461c3af0 2594 if (set != 0)
6e47a3cc 2595 ctx_set_mount_opt(ctx, m->mount_opt);
995a3ed6 2596 else
6e47a3cc 2597 ctx_clear_mount_opt(ctx, m->mount_opt);
995a3ed6 2598 }
26092bf5 2599 }
5f3633e3 2600 return 1;
26092bf5
TT
2601}
2602
2603static int parse_options(char *options, struct super_block *sb,
e6e268cb 2604 struct ext4_fs_context *ctx,
26092bf5
TT
2605 int is_remount)
2606{
461c3af0
LC
2607 struct fs_parameter param;
2608 struct fs_context fc;
2609 int ret;
2610 char *key;
26092bf5
TT
2611
2612 if (!options)
2613 return 1;
2614
461c3af0 2615 memset(&fc, 0, sizeof(fc));
e6e268cb 2616 fc.fs_private = ctx;
461c3af0
LC
2617 fc.s_fs_info = EXT4_SB(sb);
2618
2619 if (is_remount)
2620 fc.purpose = FS_CONTEXT_FOR_RECONFIGURE;
2621
2622 while ((key = strsep(&options, ",")) != NULL) {
2623 if (*key) {
2624 size_t v_len = 0;
2625 char *value = strchr(key, '=');
2626
2627 param.type = fs_value_is_flag;
2628 param.string = NULL;
2629
2630 if (value) {
2631 if (value == key)
2632 continue;
2633
2634 *value++ = 0;
2635 v_len = strlen(value);
2636 param.string = kmemdup_nul(value, v_len,
2637 GFP_KERNEL);
2638 if (!param.string)
2639 return 0;
2640 param.type = fs_value_is_string;
2641 }
2642
2643 param.key = key;
2644 param.size = v_len;
2645
2646 ret = handle_mount_opt(&fc, &param);
2647 if (param.string)
2648 kfree(param.string);
2649 if (ret < 0)
2650 return 0;
2651 }
ac27a0ec 2652 }
461c3af0 2653
da812f61
LC
2654 ret = ext4_validate_options(&fc);
2655 if (ret < 0)
2656 return 0;
2657
b6bd2435 2658 ret = ext4_check_opt_consistency(&fc, sb);
e6e268cb
LC
2659 if (ret < 0)
2660 return 0;
2661
6e47a3cc
LC
2662 ret = ext4_apply_options(&fc, sb);
2663 if (ret < 0)
2664 return 0;
e6e268cb 2665
da812f61 2666 return 1;
4c94bff9
LC
2667}
2668
e6e268cb
LC
2669static void ext4_apply_quota_options(struct fs_context *fc,
2670 struct super_block *sb)
2671{
2672#ifdef CONFIG_QUOTA
6e47a3cc 2673 bool quota_feature = ext4_has_feature_quota(sb);
e6e268cb
LC
2674 struct ext4_fs_context *ctx = fc->fs_private;
2675 struct ext4_sb_info *sbi = EXT4_SB(sb);
2676 char *qname;
2677 int i;
2678
6e47a3cc
LC
2679 if (quota_feature)
2680 return;
2681
2682 if (ctx->spec & EXT4_SPEC_JQUOTA) {
2683 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2684 if (!(ctx->qname_spec & (1 << i)))
2685 continue;
2686
2687 qname = ctx->s_qf_names[i]; /* May be NULL */
2688 ctx->s_qf_names[i] = NULL;
2689 kfree(sbi->s_qf_names[i]);
2690 rcu_assign_pointer(sbi->s_qf_names[i], qname);
2691 set_opt(sb, QUOTA);
2692 }
e6e268cb 2693 }
6e47a3cc
LC
2694
2695 if (ctx->spec & EXT4_SPEC_JQFMT)
2696 sbi->s_jquota_fmt = ctx->s_jquota_fmt;
e6e268cb
LC
2697#endif
2698}
2699
2700/*
2701 * Check quota settings consistency.
2702 */
2703static int ext4_check_quota_consistency(struct fs_context *fc,
2704 struct super_block *sb)
2705{
2706#ifdef CONFIG_QUOTA
2707 struct ext4_fs_context *ctx = fc->fs_private;
2708 struct ext4_sb_info *sbi = EXT4_SB(sb);
2709 bool quota_feature = ext4_has_feature_quota(sb);
2710 bool quota_loaded = sb_any_quota_loaded(sb);
6e47a3cc
LC
2711 bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2712 int quota_flags, i;
2713
2714 /*
2715 * We do the test below only for project quotas. 'usrquota' and
2716 * 'grpquota' mount options are allowed even without quota feature
2717 * to support legacy quotas in quota files.
2718 */
2719 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2720 !ext4_has_feature_project(sb)) {
2721 ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2722 "Cannot enable project quota enforcement.");
2723 return -EINVAL;
2724 }
e6e268cb 2725
6e47a3cc
LC
2726 quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2727 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2728 if (quota_loaded &&
2729 ctx->mask_s_mount_opt & quota_flags &&
2730 !ctx_test_mount_opt(ctx, quota_flags))
2731 goto err_quota_change;
2732
2733 if (ctx->spec & EXT4_SPEC_JQUOTA) {
e6e268cb
LC
2734
2735 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2736 if (!(ctx->qname_spec & (1 << i)))
2737 continue;
2738
6e47a3cc
LC
2739 if (quota_loaded &&
2740 !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
e6e268cb
LC
2741 goto err_jquota_change;
2742
2743 if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2744 strcmp(sbi->s_qf_names[i],
2745 ctx->s_qf_names[i]) != 0)
2746 goto err_jquota_specified;
2747 }
6e47a3cc
LC
2748
2749 if (quota_feature) {
2750 ext4_msg(NULL, KERN_INFO,
2751 "Journaled quota options ignored when "
2752 "QUOTA feature is enabled");
2753 return 0;
2754 }
e6e268cb
LC
2755 }
2756
6e47a3cc 2757 if (ctx->spec & EXT4_SPEC_JQFMT) {
e6e268cb 2758 if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
6e47a3cc 2759 goto err_jquota_change;
e6e268cb
LC
2760 if (quota_feature) {
2761 ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2762 "ignored when QUOTA feature is enabled");
2763 return 0;
2764 }
2765 }
6e47a3cc
LC
2766
2767 /* Make sure we don't mix old and new quota format */
2768 usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2769 ctx->s_qf_names[USRQUOTA]);
2770 grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2771 ctx->s_qf_names[GRPQUOTA]);
2772
2773 usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2774 test_opt(sb, USRQUOTA));
2775
2776 grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2777 test_opt(sb, GRPQUOTA));
2778
2779 if (usr_qf_name) {
2780 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2781 usrquota = false;
2782 }
2783 if (grp_qf_name) {
2784 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2785 grpquota = false;
2786 }
2787
2788 if (usr_qf_name || grp_qf_name) {
2789 if (usrquota || grpquota) {
2790 ext4_msg(NULL, KERN_ERR, "old and new quota "
2791 "format mixing");
2792 return -EINVAL;
2793 }
2794
2795 if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2796 ext4_msg(NULL, KERN_ERR, "journaled quota format "
2797 "not specified");
2798 return -EINVAL;
2799 }
2800 }
2801
e6e268cb
LC
2802 return 0;
2803
2804err_quota_change:
2805 ext4_msg(NULL, KERN_ERR,
2806 "Cannot change quota options when quota turned on");
2807 return -EINVAL;
2808err_jquota_change:
2809 ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2810 "options when quota turned on");
2811 return -EINVAL;
2812err_jquota_specified:
2813 ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2814 QTYPE2NAME(i));
2815 return -EINVAL;
e6e268cb
LC
2816#else
2817 return 0;
2818#endif
2819}
2820
b6bd2435
LC
2821static int ext4_check_opt_consistency(struct fs_context *fc,
2822 struct super_block *sb)
2823{
2824 struct ext4_fs_context *ctx = fc->fs_private;
6e47a3cc
LC
2825 struct ext4_sb_info *sbi = fc->s_fs_info;
2826 int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
b6bd2435
LC
2827
2828 if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2829 ext4_msg(NULL, KERN_ERR,
2830 "Mount option(s) incompatible with ext2");
2831 return -EINVAL;
2832 }
2833 if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2834 ext4_msg(NULL, KERN_ERR,
2835 "Mount option(s) incompatible with ext3");
2836 return -EINVAL;
2837 }
2838
6e47a3cc
LC
2839 if (ctx->s_want_extra_isize >
2840 (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2841 ext4_msg(NULL, KERN_ERR,
2842 "Invalid want_extra_isize %d",
2843 ctx->s_want_extra_isize);
2844 return -EINVAL;
2845 }
2846
2847 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2848 int blocksize =
2849 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2850 if (blocksize < PAGE_SIZE)
2851 ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2852 "experimental mount option 'dioread_nolock' "
2853 "for blocksize < PAGE_SIZE");
2854 }
2855
2856#ifdef CONFIG_FS_ENCRYPTION
2857 /*
2858 * This mount option is just for testing, and it's not worthwhile to
2859 * implement the extra complexity (e.g. RCU protection) that would be
2860 * needed to allow it to be set or changed during remount. We do allow
2861 * it to be specified during remount, but only if there is no change.
2862 */
2863 if ((ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) &&
2864 is_remount && !sbi->s_dummy_enc_policy.policy) {
2865 ext4_msg(NULL, KERN_WARNING,
2866 "Can't set test_dummy_encryption on remount");
2867 return -1;
2868 }
2869#endif
2870
2871 if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2872 if (!sbi->s_journal) {
2873 ext4_msg(NULL, KERN_WARNING,
2874 "Remounting file system with no journal "
2875 "so ignoring journalled data option");
2876 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2877 } else if (ctx->mask_s_mount_opt & EXT4_MOUNT_DATA_FLAGS) {
2878 ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2879 "on remount");
2880 return -EINVAL;
2881 }
2882 }
2883
2884 if (is_remount) {
2885 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2886 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2887 ext4_msg(NULL, KERN_ERR, "can't mount with "
2888 "both data=journal and dax");
2889 return -EINVAL;
2890 }
2891
2892 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2893 (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2894 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2895fail_dax_change_remount:
2896 ext4_msg(NULL, KERN_ERR, "can't change "
2897 "dax mount option while remounting");
2898 return -EINVAL;
2899 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2900 (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2901 (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2902 goto fail_dax_change_remount;
2903 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2904 ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2905 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2906 !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2907 goto fail_dax_change_remount;
2908 }
2909 }
2910
b6bd2435
LC
2911 return ext4_check_quota_consistency(fc, sb);
2912}
2913
6e47a3cc 2914static int ext4_apply_options(struct fs_context *fc, struct super_block *sb)
4c94bff9 2915{
6e47a3cc 2916 struct ext4_fs_context *ctx = fc->fs_private;
da812f61 2917 struct ext4_sb_info *sbi = fc->s_fs_info;
6e47a3cc
LC
2918 int ret = 0;
2919
2920 sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2921 sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2922 sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2923 sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2924 sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
2925 sbi->s_mount_flags |= ctx->vals_s_mount_flags;
2926 sb->s_flags &= ~ctx->mask_s_flags;
2927 sb->s_flags |= ctx->vals_s_flags;
2928
2929#define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2930 APPLY(s_commit_interval);
2931 APPLY(s_stripe);
2932 APPLY(s_max_batch_time);
2933 APPLY(s_min_batch_time);
2934 APPLY(s_want_extra_isize);
2935 APPLY(s_inode_readahead_blks);
2936 APPLY(s_max_dir_size_kb);
2937 APPLY(s_li_wait_mult);
2938 APPLY(s_resgid);
2939 APPLY(s_resuid);
2940
2941#ifdef CONFIG_EXT4_DEBUG
2942 APPLY(s_fc_debug_max_replay);
2943#endif
2944
2945 ext4_apply_quota_options(fc, sb);
2946
2947 if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION)
2948 ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg);
2949
2950 return ret;
2951}
2952
2953
2954static int ext4_validate_options(struct fs_context *fc)
2955{
ac27a0ec 2956#ifdef CONFIG_QUOTA
6e47a3cc 2957 struct ext4_fs_context *ctx = fc->fs_private;
4c94bff9 2958 char *usr_qf_name, *grp_qf_name;
6e47a3cc
LC
2959
2960 usr_qf_name = ctx->s_qf_names[USRQUOTA];
2961 grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2962
33458eab 2963 if (usr_qf_name || grp_qf_name) {
6e47a3cc
LC
2964 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2965 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
ac27a0ec 2966
6e47a3cc
LC
2967 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2968 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
ac27a0ec 2969
6e47a3cc
LC
2970 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2971 ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
da812f61 2972 ext4_msg(NULL, KERN_ERR, "old and new quota "
6e47a3cc 2973 "format mixing");
da812f61 2974 return -EINVAL;
ac27a0ec 2975 }
ac27a0ec
DK
2976 }
2977#endif
6e47a3cc 2978 return 1;
ac27a0ec
DK
2979}
2980
2adf6da8
TT
2981static inline void ext4_show_quota_options(struct seq_file *seq,
2982 struct super_block *sb)
2983{
2984#if defined(CONFIG_QUOTA)
2985 struct ext4_sb_info *sbi = EXT4_SB(sb);
33458eab 2986 char *usr_qf_name, *grp_qf_name;
2adf6da8
TT
2987
2988 if (sbi->s_jquota_fmt) {
2989 char *fmtname = "";
2990
2991 switch (sbi->s_jquota_fmt) {
2992 case QFMT_VFS_OLD:
2993 fmtname = "vfsold";
2994 break;
2995 case QFMT_VFS_V0:
2996 fmtname = "vfsv0";
2997 break;
2998 case QFMT_VFS_V1:
2999 fmtname = "vfsv1";
3000 break;
3001 }
3002 seq_printf(seq, ",jqfmt=%s", fmtname);
3003 }
3004
33458eab
TT
3005 rcu_read_lock();
3006 usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
3007 grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
3008 if (usr_qf_name)
3009 seq_show_option(seq, "usrjquota", usr_qf_name);
3010 if (grp_qf_name)
3011 seq_show_option(seq, "grpjquota", grp_qf_name);
3012 rcu_read_unlock();
2adf6da8
TT
3013#endif
3014}
3015
5a916be1
TT
3016static const char *token2str(int token)
3017{
50df9fd5 3018 const struct match_token *t;
5a916be1
TT
3019
3020 for (t = tokens; t->token != Opt_err; t++)
3021 if (t->token == token && !strchr(t->pattern, '='))
3022 break;
3023 return t->pattern;
3024}
3025
2adf6da8
TT
3026/*
3027 * Show an option if
3028 * - it's set to a non-default value OR
3029 * - if the per-sb default is different from the global default
3030 */
66acdcf4
TT
3031static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
3032 int nodefs)
2adf6da8 3033{
2adf6da8
TT
3034 struct ext4_sb_info *sbi = EXT4_SB(sb);
3035 struct ext4_super_block *es = sbi->s_es;
68afa7e0 3036 int def_errors, def_mount_opt = sbi->s_def_mount_opt;
5a916be1 3037 const struct mount_opts *m;
66acdcf4 3038 char sep = nodefs ? '\n' : ',';
2adf6da8 3039
66acdcf4
TT
3040#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
3041#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2adf6da8
TT
3042
3043 if (sbi->s_sb_block != 1)
5a916be1
TT
3044 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
3045
3046 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
3047 int want_set = m->flags & MOPT_SET;
3048 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
9cb20f94 3049 (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
5a916be1 3050 continue;
68afa7e0 3051 if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
5a916be1
TT
3052 continue; /* skip if same as the default */
3053 if ((want_set &&
3054 (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
3055 (!want_set && (sbi->s_mount_opt & m->mount_opt)))
3056 continue; /* select Opt_noFoo vs Opt_Foo */
3057 SEQ_OPTS_PRINT("%s", token2str(m->token));
2adf6da8 3058 }
5a916be1 3059
08cefc7a 3060 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
5a916be1 3061 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
08cefc7a
EB
3062 SEQ_OPTS_PRINT("resuid=%u",
3063 from_kuid_munged(&init_user_ns, sbi->s_resuid));
3064 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
5a916be1 3065 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
08cefc7a
EB
3066 SEQ_OPTS_PRINT("resgid=%u",
3067 from_kgid_munged(&init_user_ns, sbi->s_resgid));
66acdcf4 3068 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
5a916be1
TT
3069 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
3070 SEQ_OPTS_PUTS("errors=remount-ro");
2adf6da8 3071 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
5a916be1 3072 SEQ_OPTS_PUTS("errors=continue");
2adf6da8 3073 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
5a916be1 3074 SEQ_OPTS_PUTS("errors=panic");
66acdcf4 3075 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
5a916be1 3076 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
66acdcf4 3077 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
5a916be1 3078 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
66acdcf4 3079 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
5a916be1 3080 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
357fdad0 3081 if (sb->s_flags & SB_I_VERSION)
5a916be1 3082 SEQ_OPTS_PUTS("i_version");
66acdcf4 3083 if (nodefs || sbi->s_stripe)
5a916be1 3084 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
68afa7e0
TN
3085 if (nodefs || EXT4_MOUNT_DATA_FLAGS &
3086 (sbi->s_mount_opt ^ def_mount_opt)) {
5a916be1
TT
3087 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
3088 SEQ_OPTS_PUTS("data=journal");
3089 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
3090 SEQ_OPTS_PUTS("data=ordered");
3091 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
3092 SEQ_OPTS_PUTS("data=writeback");
3093 }
66acdcf4
TT
3094 if (nodefs ||
3095 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
5a916be1
TT
3096 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
3097 sbi->s_inode_readahead_blks);
2adf6da8 3098
ceec0376 3099 if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
66acdcf4 3100 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
5a916be1 3101 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
df981d03
TT
3102 if (nodefs || sbi->s_max_dir_size_kb)
3103 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
7915a861
AN
3104 if (test_opt(sb, DATA_ERR_ABORT))
3105 SEQ_OPTS_PUTS("data_err=abort");
ed318a6c
EB
3106
3107 fscrypt_show_test_dummy_encryption(seq, sep, sb);
2adf6da8 3108
4f74d15f
EB
3109 if (sb->s_flags & SB_INLINECRYPT)
3110 SEQ_OPTS_PUTS("inlinecrypt");
3111
9cb20f94
IW
3112 if (test_opt(sb, DAX_ALWAYS)) {
3113 if (IS_EXT2_SB(sb))
3114 SEQ_OPTS_PUTS("dax");
3115 else
3116 SEQ_OPTS_PUTS("dax=always");
3117 } else if (test_opt2(sb, DAX_NEVER)) {
3118 SEQ_OPTS_PUTS("dax=never");
3119 } else if (test_opt2(sb, DAX_INODE)) {
3120 SEQ_OPTS_PUTS("dax=inode");
3121 }
2adf6da8 3122 ext4_show_quota_options(seq, sb);
2adf6da8
TT
3123 return 0;
3124}
3125
66acdcf4
TT
3126static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3127{
3128 return _ext4_show_options(seq, root->d_sb, 0);
3129}
3130
ebd173be 3131int ext4_seq_options_show(struct seq_file *seq, void *offset)
66acdcf4
TT
3132{
3133 struct super_block *sb = seq->private;
3134 int rc;
3135
bc98a42c 3136 seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
66acdcf4
TT
3137 rc = _ext4_show_options(seq, sb, 1);
3138 seq_puts(seq, "\n");
3139 return rc;
3140}
3141
617ba13b 3142static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ac27a0ec
DK
3143 int read_only)
3144{
617ba13b 3145 struct ext4_sb_info *sbi = EXT4_SB(sb);
c89128a0 3146 int err = 0;
ac27a0ec 3147
617ba13b 3148 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
b31e1552
ES
3149 ext4_msg(sb, KERN_ERR, "revision level too high, "
3150 "forcing read-only mode");
c89128a0 3151 err = -EROFS;
5adaccac 3152 goto done;
ac27a0ec
DK
3153 }
3154 if (read_only)
281b5995 3155 goto done;
617ba13b 3156 if (!(sbi->s_mount_state & EXT4_VALID_FS))
b31e1552
ES
3157 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3158 "running e2fsck is recommended");
c8b459f4 3159 else if (sbi->s_mount_state & EXT4_ERROR_FS)
b31e1552
ES
3160 ext4_msg(sb, KERN_WARNING,
3161 "warning: mounting fs with errors, "
3162 "running e2fsck is recommended");
ed3ce80a 3163 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
ac27a0ec
DK
3164 le16_to_cpu(es->s_mnt_count) >=
3165 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
b31e1552
ES
3166 ext4_msg(sb, KERN_WARNING,
3167 "warning: maximal mount count reached, "
3168 "running e2fsck is recommended");
ac27a0ec 3169 else if (le32_to_cpu(es->s_checkinterval) &&
6a0678a7
AB
3170 (ext4_get_tstamp(es, s_lastcheck) +
3171 le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
b31e1552
ES
3172 ext4_msg(sb, KERN_WARNING,
3173 "warning: checktime reached, "
3174 "running e2fsck is recommended");
0b8e58a1 3175 if (!sbi->s_journal)
0390131b 3176 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
ac27a0ec 3177 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
617ba13b 3178 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
e8546d06 3179 le16_add_cpu(&es->s_mnt_count, 1);
6a0678a7 3180 ext4_update_tstamp(es, s_mtime);
02f310fc 3181 if (sbi->s_journal) {
e2b911c5 3182 ext4_set_feature_journal_needs_recovery(sb);
02f310fc
JK
3183 if (ext4_has_feature_orphan_file(sb))
3184 ext4_set_feature_orphan_present(sb);
3185 }
ac27a0ec 3186
4392fbc4 3187 err = ext4_commit_super(sb);
281b5995 3188done:
ac27a0ec 3189 if (test_opt(sb, DEBUG))
a9df9a49 3190 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
a2595b8a 3191 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
ac27a0ec
DK
3192 sb->s_blocksize,
3193 sbi->s_groups_count,
617ba13b
MC
3194 EXT4_BLOCKS_PER_GROUP(sb),
3195 EXT4_INODES_PER_GROUP(sb),
a2595b8a 3196 sbi->s_mount_opt, sbi->s_mount_opt2);
ac27a0ec 3197
7abc52c2 3198 cleancache_init_fs(sb);
c89128a0 3199 return err;
ac27a0ec
DK
3200}
3201
117fff10
TT
3202int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3203{
3204 struct ext4_sb_info *sbi = EXT4_SB(sb);
7c990728 3205 struct flex_groups **old_groups, **new_groups;
37b0b6b8 3206 int size, i, j;
117fff10
TT
3207
3208 if (!sbi->s_log_groups_per_flex)
3209 return 0;
3210
3211 size = ext4_flex_group(sbi, ngroup - 1) + 1;
3212 if (size <= sbi->s_flex_groups_allocated)
3213 return 0;
3214
7c990728
SJS
3215 new_groups = kvzalloc(roundup_pow_of_two(size *
3216 sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
117fff10 3217 if (!new_groups) {
7c990728
SJS
3218 ext4_msg(sb, KERN_ERR,
3219 "not enough memory for %d flex group pointers", size);
117fff10
TT
3220 return -ENOMEM;
3221 }
7c990728
SJS
3222 for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3223 new_groups[i] = kvzalloc(roundup_pow_of_two(
3224 sizeof(struct flex_groups)),
3225 GFP_KERNEL);
3226 if (!new_groups[i]) {
37b0b6b8
DC
3227 for (j = sbi->s_flex_groups_allocated; j < i; j++)
3228 kvfree(new_groups[j]);
7c990728
SJS
3229 kvfree(new_groups);
3230 ext4_msg(sb, KERN_ERR,
3231 "not enough memory for %d flex groups", size);
3232 return -ENOMEM;
3233 }
117fff10 3234 }
7c990728
SJS
3235 rcu_read_lock();
3236 old_groups = rcu_dereference(sbi->s_flex_groups);
3237 if (old_groups)
3238 memcpy(new_groups, old_groups,
3239 (sbi->s_flex_groups_allocated *
3240 sizeof(struct flex_groups *)));
3241 rcu_read_unlock();
3242 rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3243 sbi->s_flex_groups_allocated = size;
3244 if (old_groups)
3245 ext4_kvfree_array_rcu(old_groups);
117fff10
TT
3246 return 0;
3247}
3248
772cb7c8
JS
3249static int ext4_fill_flex_info(struct super_block *sb)
3250{
3251 struct ext4_sb_info *sbi = EXT4_SB(sb);
3252 struct ext4_group_desc *gdp = NULL;
7c990728 3253 struct flex_groups *fg;
772cb7c8 3254 ext4_group_t flex_group;
117fff10 3255 int i, err;
772cb7c8 3256
503358ae 3257 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
d50f2ab6 3258 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
772cb7c8
JS
3259 sbi->s_log_groups_per_flex = 0;
3260 return 1;
3261 }
3262
117fff10
TT
3263 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3264 if (err)
9933fc0a 3265 goto failed;
772cb7c8 3266
772cb7c8 3267 for (i = 0; i < sbi->s_groups_count; i++) {
88b6edd1 3268 gdp = ext4_get_group_desc(sb, i, NULL);
772cb7c8
JS
3269
3270 flex_group = ext4_flex_group(sbi, i);
7c990728
SJS
3271 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3272 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
90ba983f 3273 atomic64_add(ext4_free_group_clusters(sb, gdp),
7c990728
SJS
3274 &fg->free_clusters);
3275 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
772cb7c8
JS
3276 }
3277
3278 return 1;
3279failed:
3280 return 0;
3281}
3282
e2b911c5 3283static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
feb0ab32 3284 struct ext4_group_desc *gdp)
717d50e4 3285{
b47820ed 3286 int offset = offsetof(struct ext4_group_desc, bg_checksum);
717d50e4 3287 __u16 crc = 0;
feb0ab32 3288 __le32 le_group = cpu_to_le32(block_group);
e2b911c5 3289 struct ext4_sb_info *sbi = EXT4_SB(sb);
717d50e4 3290
9aa5d32b 3291 if (ext4_has_metadata_csum(sbi->s_sb)) {
feb0ab32 3292 /* Use new metadata_csum algorithm */
feb0ab32 3293 __u32 csum32;
b47820ed 3294 __u16 dummy_csum = 0;
feb0ab32 3295
feb0ab32
DW
3296 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3297 sizeof(le_group));
b47820ed
DJ
3298 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3299 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3300 sizeof(dummy_csum));
3301 offset += sizeof(dummy_csum);
3302 if (offset < sbi->s_desc_size)
3303 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3304 sbi->s_desc_size - offset);
feb0ab32
DW
3305
3306 crc = csum32 & 0xFFFF;
3307 goto out;
717d50e4
AD
3308 }
3309
feb0ab32 3310 /* old crc16 code */
e2b911c5 3311 if (!ext4_has_feature_gdt_csum(sb))
813d32f9
DW
3312 return 0;
3313
feb0ab32
DW
3314 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3315 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3316 crc = crc16(crc, (__u8 *)gdp, offset);
3317 offset += sizeof(gdp->bg_checksum); /* skip checksum */
3318 /* for checksum of struct ext4_group_desc do the rest...*/
e2b911c5 3319 if (ext4_has_feature_64bit(sb) &&
feb0ab32
DW
3320 offset < le16_to_cpu(sbi->s_es->s_desc_size))
3321 crc = crc16(crc, (__u8 *)gdp + offset,
3322 le16_to_cpu(sbi->s_es->s_desc_size) -
3323 offset);
3324
3325out:
717d50e4
AD
3326 return cpu_to_le16(crc);
3327}
3328
feb0ab32 3329int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
717d50e4
AD
3330 struct ext4_group_desc *gdp)
3331{
feb0ab32 3332 if (ext4_has_group_desc_csum(sb) &&
e2b911c5 3333 (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
717d50e4
AD
3334 return 0;
3335
3336 return 1;
3337}
3338
feb0ab32
DW
3339void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3340 struct ext4_group_desc *gdp)
3341{
3342 if (!ext4_has_group_desc_csum(sb))
3343 return;
e2b911c5 3344 gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
feb0ab32
DW
3345}
3346
ac27a0ec 3347/* Called at mount-time, super-block is locked */
bfff6873 3348static int ext4_check_descriptors(struct super_block *sb,
829fa70d 3349 ext4_fsblk_t sb_block,
bfff6873 3350 ext4_group_t *first_not_zeroed)
ac27a0ec 3351{
617ba13b
MC
3352 struct ext4_sb_info *sbi = EXT4_SB(sb);
3353 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3354 ext4_fsblk_t last_block;
44de022c 3355 ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
bd81d8ee
LV
3356 ext4_fsblk_t block_bitmap;
3357 ext4_fsblk_t inode_bitmap;
3358 ext4_fsblk_t inode_table;
ce421581 3359 int flexbg_flag = 0;
bfff6873 3360 ext4_group_t i, grp = sbi->s_groups_count;
ac27a0ec 3361
e2b911c5 3362 if (ext4_has_feature_flex_bg(sb))
ce421581
JS
3363 flexbg_flag = 1;
3364
af5bc92d 3365 ext4_debug("Checking group descriptors");
ac27a0ec 3366
197cd65a
AM
3367 for (i = 0; i < sbi->s_groups_count; i++) {
3368 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3369
ce421581 3370 if (i == sbi->s_groups_count - 1 || flexbg_flag)
bd81d8ee 3371 last_block = ext4_blocks_count(sbi->s_es) - 1;
ac27a0ec
DK
3372 else
3373 last_block = first_block +
617ba13b 3374 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
ac27a0ec 3375
bfff6873
LC
3376 if ((grp == sbi->s_groups_count) &&
3377 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3378 grp = i;
3379
8fadc143 3380 block_bitmap = ext4_block_bitmap(sb, gdp);
829fa70d
TT
3381 if (block_bitmap == sb_block) {
3382 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3383 "Block bitmap for group %u overlaps "
3384 "superblock", i);
18db4b4e
TT
3385 if (!sb_rdonly(sb))
3386 return 0;
829fa70d 3387 }
77260807
TT
3388 if (block_bitmap >= sb_block + 1 &&
3389 block_bitmap <= last_bg_block) {
3390 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3391 "Block bitmap for group %u overlaps "
3392 "block group descriptors", i);
3393 if (!sb_rdonly(sb))
3394 return 0;
3395 }
2b2d6d01 3396 if (block_bitmap < first_block || block_bitmap > last_block) {
b31e1552 3397 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
a9df9a49 3398 "Block bitmap for group %u not in group "
b31e1552 3399 "(block %llu)!", i, block_bitmap);
ac27a0ec
DK
3400 return 0;
3401 }
8fadc143 3402 inode_bitmap = ext4_inode_bitmap(sb, gdp);
829fa70d
TT
3403 if (inode_bitmap == sb_block) {
3404 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3405 "Inode bitmap for group %u overlaps "
3406 "superblock", i);
18db4b4e
TT
3407 if (!sb_rdonly(sb))
3408 return 0;
829fa70d 3409 }
77260807
TT
3410 if (inode_bitmap >= sb_block + 1 &&
3411 inode_bitmap <= last_bg_block) {
3412 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3413 "Inode bitmap for group %u overlaps "
3414 "block group descriptors", i);
3415 if (!sb_rdonly(sb))
3416 return 0;
3417 }
2b2d6d01 3418 if (inode_bitmap < first_block || inode_bitmap > last_block) {
b31e1552 3419 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
a9df9a49 3420 "Inode bitmap for group %u not in group "
b31e1552 3421 "(block %llu)!", i, inode_bitmap);
ac27a0ec
DK
3422 return 0;
3423 }
8fadc143 3424 inode_table = ext4_inode_table(sb, gdp);
829fa70d
TT
3425 if (inode_table == sb_block) {
3426 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3427 "Inode table for group %u overlaps "
3428 "superblock", i);
18db4b4e
TT
3429 if (!sb_rdonly(sb))
3430 return 0;
829fa70d 3431 }
77260807
TT
3432 if (inode_table >= sb_block + 1 &&
3433 inode_table <= last_bg_block) {
3434 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3435 "Inode table for group %u overlaps "
3436 "block group descriptors", i);
3437 if (!sb_rdonly(sb))
3438 return 0;
3439 }
bd81d8ee 3440 if (inode_table < first_block ||
2b2d6d01 3441 inode_table + sbi->s_itb_per_group - 1 > last_block) {
b31e1552 3442 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
a9df9a49 3443 "Inode table for group %u not in group "
b31e1552 3444 "(block %llu)!", i, inode_table);
ac27a0ec
DK
3445 return 0;
3446 }
955ce5f5 3447 ext4_lock_group(sb, i);
feb0ab32 3448 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
b31e1552
ES
3449 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3450 "Checksum for group %u failed (%u!=%u)",
e2b911c5 3451 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
b31e1552 3452 gdp)), le16_to_cpu(gdp->bg_checksum));
bc98a42c 3453 if (!sb_rdonly(sb)) {
955ce5f5 3454 ext4_unlock_group(sb, i);
8a266467 3455 return 0;
7ee1ec4c 3456 }
717d50e4 3457 }
955ce5f5 3458 ext4_unlock_group(sb, i);
ce421581
JS
3459 if (!flexbg_flag)
3460 first_block += EXT4_BLOCKS_PER_GROUP(sb);
ac27a0ec 3461 }
bfff6873
LC
3462 if (NULL != first_not_zeroed)
3463 *first_not_zeroed = grp;
ac27a0ec
DK
3464 return 1;
3465}
3466
cd2291a4
ES
3467/*
3468 * Maximal extent format file size.
3469 * Resulting logical blkno at s_maxbytes must fit in our on-disk
3470 * extent format containers, within a sector_t, and within i_blocks
3471 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
3472 * so that won't be a limiting factor.
3473 *
f17722f9
LC
3474 * However there is other limiting factor. We do store extents in the form
3475 * of starting block and length, hence the resulting length of the extent
3476 * covering maximum file size must fit into on-disk format containers as
3477 * well. Given that length is always by 1 unit bigger than max unit (because
3478 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3479 *
cd2291a4
ES
3480 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3481 */
f287a1a5 3482static loff_t ext4_max_size(int blkbits, int has_huge_files)
cd2291a4
ES
3483{
3484 loff_t res;
3485 loff_t upper_limit = MAX_LFS_FILESIZE;
3486
72deb455
CH
3487 BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3488
3489 if (!has_huge_files) {
cd2291a4
ES
3490 upper_limit = (1LL << 32) - 1;
3491
3492 /* total blocks in file system block size */
3493 upper_limit >>= (blkbits - 9);
3494 upper_limit <<= blkbits;
3495 }
3496
f17722f9
LC
3497 /*
3498 * 32-bit extent-start container, ee_block. We lower the maxbytes
3499 * by one fs block, so ee_len can cover the extent of maximum file
3500 * size
3501 */
3502 res = (1LL << 32) - 1;
cd2291a4 3503 res <<= blkbits;
cd2291a4
ES
3504
3505 /* Sanity check against vm- & vfs- imposed limits */
3506 if (res > upper_limit)
3507 res = upper_limit;
3508
3509 return res;
3510}
ac27a0ec 3511
ac27a0ec 3512/*
cd2291a4 3513 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
0fc1b451
AK
3514 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3515 * We need to be 1 filesystem block less than the 2^48 sector limit.
ac27a0ec 3516 */
f287a1a5 3517static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
ac27a0ec 3518{
75ca6ad4 3519 unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS;
0fc1b451 3520 int meta_blocks;
75ca6ad4
RH
3521
3522 /*
3523 * This is calculated to be the largest file size for a dense, block
0b8e58a1
AD
3524 * mapped file such that the file's total number of 512-byte sectors,
3525 * including data and all indirect blocks, does not exceed (2^48 - 1).
3526 *
3527 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3528 * number of 512-byte sectors of the file.
0fc1b451 3529 */
72deb455 3530 if (!has_huge_files) {
0fc1b451 3531 /*
72deb455
CH
3532 * !has_huge_files or implies that the inode i_block field
3533 * represents total file blocks in 2^32 512-byte sectors ==
3534 * size of vfs inode i_blocks * 8
0fc1b451
AK
3535 */
3536 upper_limit = (1LL << 32) - 1;
3537
3538 /* total blocks in file system block size */
3539 upper_limit >>= (bits - 9);
3540
3541 } else {
8180a562
AK
3542 /*
3543 * We use 48 bit ext4_inode i_blocks
3544 * With EXT4_HUGE_FILE_FL set the i_blocks
3545 * represent total number of blocks in
3546 * file system block size
3547 */
0fc1b451
AK
3548 upper_limit = (1LL << 48) - 1;
3549
0fc1b451
AK
3550 }
3551
3552 /* indirect blocks */
3553 meta_blocks = 1;
3554 /* double indirect blocks */
3555 meta_blocks += 1 + (1LL << (bits-2));
3556 /* tripple indirect blocks */
3557 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
3558
3559 upper_limit -= meta_blocks;
3560 upper_limit <<= bits;
ac27a0ec
DK
3561
3562 res += 1LL << (bits-2);
3563 res += 1LL << (2*(bits-2));
3564 res += 1LL << (3*(bits-2));
3565 res <<= bits;
3566 if (res > upper_limit)
3567 res = upper_limit;
0fc1b451
AK
3568
3569 if (res > MAX_LFS_FILESIZE)
3570 res = MAX_LFS_FILESIZE;
3571
75ca6ad4 3572 return (loff_t)res;
ac27a0ec
DK
3573}
3574
617ba13b 3575static ext4_fsblk_t descriptor_loc(struct super_block *sb,
0b8e58a1 3576 ext4_fsblk_t logical_sb_block, int nr)
ac27a0ec 3577{
617ba13b 3578 struct ext4_sb_info *sbi = EXT4_SB(sb);
fd2d4291 3579 ext4_group_t bg, first_meta_bg;
ac27a0ec
DK
3580 int has_super = 0;
3581
3582 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3583
e2b911c5 3584 if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
70bbb3e0 3585 return logical_sb_block + nr + 1;
ac27a0ec 3586 bg = sbi->s_desc_per_block * nr;
617ba13b 3587 if (ext4_bg_has_super(sb, bg))
ac27a0ec 3588 has_super = 1;
0b8e58a1 3589
bd63f6b0
DW
3590 /*
3591 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3592 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
3593 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3594 * compensate.
3595 */
3596 if (sb->s_blocksize == 1024 && nr == 0 &&
49598e04 3597 le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
bd63f6b0
DW
3598 has_super++;
3599
617ba13b 3600 return (has_super + ext4_group_first_block_no(sb, bg));
ac27a0ec
DK
3601}
3602
c9de560d
AT
3603/**
3604 * ext4_get_stripe_size: Get the stripe size.
3605 * @sbi: In memory super block info
3606 *
3607 * If we have specified it via mount option, then
3608 * use the mount option value. If the value specified at mount time is
3609 * greater than the blocks per group use the super block value.
3610 * If the super block value is greater than blocks per group return 0.
3611 * Allocator needs it be less than blocks per group.
3612 *
3613 */
3614static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3615{
3616 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3617 unsigned long stripe_width =
3618 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3eb08658 3619 int ret;
c9de560d
AT
3620
3621 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3eb08658 3622 ret = sbi->s_stripe;
5469d7c3 3623 else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3eb08658 3624 ret = stripe_width;
5469d7c3 3625 else if (stride && stride <= sbi->s_blocks_per_group)
3eb08658
DE
3626 ret = stride;
3627 else
3628 ret = 0;
c9de560d 3629
3eb08658
DE
3630 /*
3631 * If the stripe width is 1, this makes no sense and
3632 * we set it to 0 to turn off stripe handling code.
3633 */
3634 if (ret <= 1)
3635 ret = 0;
c9de560d 3636
3eb08658 3637 return ret;
c9de560d 3638}
ac27a0ec 3639
a13fb1a4
ES
3640/*
3641 * Check whether this filesystem can be mounted based on
3642 * the features present and the RDONLY/RDWR mount requested.
3643 * Returns 1 if this filesystem can be mounted as requested,
3644 * 0 if it cannot be.
3645 */
25c6d98f 3646int ext4_feature_set_ok(struct super_block *sb, int readonly)
a13fb1a4 3647{
e2b911c5 3648 if (ext4_has_unknown_ext4_incompat_features(sb)) {
a13fb1a4
ES
3649 ext4_msg(sb, KERN_ERR,
3650 "Couldn't mount because of "
3651 "unsupported optional features (%x)",
3652 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3653 ~EXT4_FEATURE_INCOMPAT_SUPP));
3654 return 0;
3655 }
3656
c83ad55e
GKB
3657#ifndef CONFIG_UNICODE
3658 if (ext4_has_feature_casefold(sb)) {
3659 ext4_msg(sb, KERN_ERR,
3660 "Filesystem with casefold feature cannot be "
3661 "mounted without CONFIG_UNICODE");
3662 return 0;
3663 }
3664#endif
3665
a13fb1a4
ES
3666 if (readonly)
3667 return 1;
3668
e2b911c5 3669 if (ext4_has_feature_readonly(sb)) {
2cb5cc8b 3670 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
1751e8a6 3671 sb->s_flags |= SB_RDONLY;
2cb5cc8b
DW
3672 return 1;
3673 }
3674
a13fb1a4 3675 /* Check that feature set is OK for a read-write mount */
e2b911c5 3676 if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
a13fb1a4
ES
3677 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3678 "unsupported optional features (%x)",
3679 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3680 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3681 return 0;
3682 }
e2b911c5 3683 if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
bab08ab9
TT
3684 ext4_msg(sb, KERN_ERR,
3685 "Can't support bigalloc feature without "
3686 "extents feature\n");
3687 return 0;
3688 }
7c319d32 3689
9db176bc 3690#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
d65d87a0
TT
3691 if (!readonly && (ext4_has_feature_quota(sb) ||
3692 ext4_has_feature_project(sb))) {
7c319d32 3693 ext4_msg(sb, KERN_ERR,
d65d87a0 3694 "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
689c958c
LX
3695 return 0;
3696 }
7c319d32 3697#endif /* CONFIG_QUOTA */
a13fb1a4
ES
3698 return 1;
3699}
3700
66e61a9e
TT
3701/*
3702 * This function is called once a day if we have errors logged
3703 * on the file system
3704 */
235699a8 3705static void print_daily_error_info(struct timer_list *t)
66e61a9e 3706{
235699a8
KC
3707 struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3708 struct super_block *sb = sbi->s_sb;
3709 struct ext4_super_block *es = sbi->s_es;
66e61a9e
TT
3710
3711 if (es->s_error_count)
ae0f78de
TT
3712 /* fsck newer than v1.41.13 is needed to clean this condition. */
3713 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
66e61a9e
TT
3714 le32_to_cpu(es->s_error_count));
3715 if (es->s_first_error_time) {
6a0678a7
AB
3716 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3717 sb->s_id,
3718 ext4_get_tstamp(es, s_first_error_time),
66e61a9e
TT
3719 (int) sizeof(es->s_first_error_func),
3720 es->s_first_error_func,
3721 le32_to_cpu(es->s_first_error_line));
3722 if (es->s_first_error_ino)
651e1c3b 3723 printk(KERN_CONT ": inode %u",
66e61a9e
TT
3724 le32_to_cpu(es->s_first_error_ino));
3725 if (es->s_first_error_block)
651e1c3b 3726 printk(KERN_CONT ": block %llu", (unsigned long long)
66e61a9e 3727 le64_to_cpu(es->s_first_error_block));
651e1c3b 3728 printk(KERN_CONT "\n");
66e61a9e
TT
3729 }
3730 if (es->s_last_error_time) {
6a0678a7
AB
3731 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3732 sb->s_id,
3733 ext4_get_tstamp(es, s_last_error_time),
66e61a9e
TT
3734 (int) sizeof(es->s_last_error_func),
3735 es->s_last_error_func,
3736 le32_to_cpu(es->s_last_error_line));
3737 if (es->s_last_error_ino)
651e1c3b 3738 printk(KERN_CONT ": inode %u",
66e61a9e
TT
3739 le32_to_cpu(es->s_last_error_ino));
3740 if (es->s_last_error_block)
651e1c3b 3741 printk(KERN_CONT ": block %llu", (unsigned long long)
66e61a9e 3742 le64_to_cpu(es->s_last_error_block));
651e1c3b 3743 printk(KERN_CONT "\n");
66e61a9e
TT
3744 }
3745 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
3746}
3747
bfff6873
LC
3748/* Find next suitable group and run ext4_init_inode_table */
3749static int ext4_run_li_request(struct ext4_li_request *elr)
3750{
3751 struct ext4_group_desc *gdp = NULL;
3d392b26
TT
3752 struct super_block *sb = elr->lr_super;
3753 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3754 ext4_group_t group = elr->lr_next_group;
3d392b26 3755 unsigned int prefetch_ios = 0;
bfff6873 3756 int ret = 0;
39fec688 3757 u64 start_time;
bfff6873 3758
3d392b26
TT
3759 if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3760 elr->lr_next_group = ext4_mb_prefetch(sb, group,
3761 EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
3762 if (prefetch_ios)
3763 ext4_mb_prefetch_fini(sb, elr->lr_next_group,
3764 prefetch_ios);
3765 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
3766 prefetch_ios);
3767 if (group >= elr->lr_next_group) {
3768 ret = 1;
3769 if (elr->lr_first_not_zeroed != ngroups &&
3770 !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3771 elr->lr_next_group = elr->lr_first_not_zeroed;
3772 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3773 ret = 0;
3774 }
3775 }
3776 return ret;
3777 }
bfff6873 3778
3d392b26 3779 for (; group < ngroups; group++) {
bfff6873
LC
3780 gdp = ext4_get_group_desc(sb, group, NULL);
3781 if (!gdp) {
3782 ret = 1;
3783 break;
3784 }
3785
3786 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3787 break;
3788 }
3789
7f511862 3790 if (group >= ngroups)
bfff6873
LC
3791 ret = 1;
3792
3793 if (!ret) {
39fec688 3794 start_time = ktime_get_real_ns();
bfff6873
LC
3795 ret = ext4_init_inode_table(sb, group,
3796 elr->lr_timeout ? 0 : 1);
3d392b26 3797 trace_ext4_lazy_itable_init(sb, group);
bfff6873 3798 if (elr->lr_timeout == 0) {
39fec688
SX
3799 elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3800 EXT4_SB(elr->lr_super)->s_li_wait_mult);
bfff6873
LC
3801 }
3802 elr->lr_next_sched = jiffies + elr->lr_timeout;
3803 elr->lr_next_group = group + 1;
3804 }
bfff6873
LC
3805 return ret;
3806}
3807
3808/*
3809 * Remove lr_request from the list_request and free the
4ed5c033 3810 * request structure. Should be called with li_list_mtx held
bfff6873
LC
3811 */
3812static void ext4_remove_li_request(struct ext4_li_request *elr)
3813{
bfff6873
LC
3814 if (!elr)
3815 return;
3816
bfff6873 3817 list_del(&elr->lr_request);
3d392b26 3818 EXT4_SB(elr->lr_super)->s_li_request = NULL;
bfff6873
LC
3819 kfree(elr);
3820}
3821
3822static void ext4_unregister_li_request(struct super_block *sb)
3823{
1bb933fb
LC
3824 mutex_lock(&ext4_li_mtx);
3825 if (!ext4_li_info) {
3826 mutex_unlock(&ext4_li_mtx);
bfff6873 3827 return;
1bb933fb 3828 }
bfff6873
LC
3829
3830 mutex_lock(&ext4_li_info->li_list_mtx);
1bb933fb 3831 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
bfff6873 3832 mutex_unlock(&ext4_li_info->li_list_mtx);
1bb933fb 3833 mutex_unlock(&ext4_li_mtx);
bfff6873
LC
3834}
3835
8f1f7453
ES
3836static struct task_struct *ext4_lazyinit_task;
3837
bfff6873
LC
3838/*
3839 * This is the function where ext4lazyinit thread lives. It walks
3840 * through the request list searching for next scheduled filesystem.
3841 * When such a fs is found, run the lazy initialization request
3842 * (ext4_rn_li_request) and keep track of the time spend in this
3843 * function. Based on that time we compute next schedule time of
3844 * the request. When walking through the list is complete, compute
3845 * next waking time and put itself into sleep.
3846 */
3847static int ext4_lazyinit_thread(void *arg)
3848{
3849 struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
3850 struct list_head *pos, *n;
3851 struct ext4_li_request *elr;
4ed5c033 3852 unsigned long next_wakeup, cur;
bfff6873
LC
3853
3854 BUG_ON(NULL == eli);
3855
bfff6873
LC
3856cont_thread:
3857 while (true) {
3858 next_wakeup = MAX_JIFFY_OFFSET;
3859
3860 mutex_lock(&eli->li_list_mtx);
3861 if (list_empty(&eli->li_request_list)) {
3862 mutex_unlock(&eli->li_list_mtx);
3863 goto exit_thread;
3864 }
bfff6873 3865 list_for_each_safe(pos, n, &eli->li_request_list) {
e22834f0
DM
3866 int err = 0;
3867 int progress = 0;
bfff6873
LC
3868 elr = list_entry(pos, struct ext4_li_request,
3869 lr_request);
3870
e22834f0
DM
3871 if (time_before(jiffies, elr->lr_next_sched)) {
3872 if (time_before(elr->lr_next_sched, next_wakeup))
3873 next_wakeup = elr->lr_next_sched;
3874 continue;
3875 }
3876 if (down_read_trylock(&elr->lr_super->s_umount)) {
3877 if (sb_start_write_trylock(elr->lr_super)) {
3878 progress = 1;
3879 /*
3880 * We hold sb->s_umount, sb can not
3881 * be removed from the list, it is
3882 * now safe to drop li_list_mtx
3883 */
3884 mutex_unlock(&eli->li_list_mtx);
3885 err = ext4_run_li_request(elr);
3886 sb_end_write(elr->lr_super);
3887 mutex_lock(&eli->li_list_mtx);
3888 n = pos->next;
b2c78cd0 3889 }
e22834f0
DM
3890 up_read((&elr->lr_super->s_umount));
3891 }
3892 /* error, remove the lazy_init job */
3893 if (err) {
3894 ext4_remove_li_request(elr);
3895 continue;
3896 }
3897 if (!progress) {
3898 elr->lr_next_sched = jiffies +
3899 (prandom_u32()
3900 % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
bfff6873 3901 }
bfff6873
LC
3902 if (time_before(elr->lr_next_sched, next_wakeup))
3903 next_wakeup = elr->lr_next_sched;
3904 }
3905 mutex_unlock(&eli->li_list_mtx);
3906
a0acae0e 3907 try_to_freeze();
bfff6873 3908
4ed5c033
LC
3909 cur = jiffies;
3910 if ((time_after_eq(cur, next_wakeup)) ||
f4245bd4 3911 (MAX_JIFFY_OFFSET == next_wakeup)) {
bfff6873
LC
3912 cond_resched();
3913 continue;
3914 }
3915
4ed5c033
LC
3916 schedule_timeout_interruptible(next_wakeup - cur);
3917
8f1f7453
ES
3918 if (kthread_should_stop()) {
3919 ext4_clear_request_list();
3920 goto exit_thread;
3921 }
bfff6873
LC
3922 }
3923
3924exit_thread:
3925 /*
3926 * It looks like the request list is empty, but we need
3927 * to check it under the li_list_mtx lock, to prevent any
3928 * additions into it, and of course we should lock ext4_li_mtx
3929 * to atomically free the list and ext4_li_info, because at
3930 * this point another ext4 filesystem could be registering
3931 * new one.
3932 */
3933 mutex_lock(&ext4_li_mtx);
3934 mutex_lock(&eli->li_list_mtx);
3935 if (!list_empty(&eli->li_request_list)) {
3936 mutex_unlock(&eli->li_list_mtx);
3937 mutex_unlock(&ext4_li_mtx);
3938 goto cont_thread;
3939 }
3940 mutex_unlock(&eli->li_list_mtx);
bfff6873
LC
3941 kfree(ext4_li_info);
3942 ext4_li_info = NULL;
3943 mutex_unlock(&ext4_li_mtx);
3944
3945 return 0;
3946}
3947
3948static void ext4_clear_request_list(void)
3949{
3950 struct list_head *pos, *n;
3951 struct ext4_li_request *elr;
3952
3953 mutex_lock(&ext4_li_info->li_list_mtx);
bfff6873
LC
3954 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3955 elr = list_entry(pos, struct ext4_li_request,
3956 lr_request);
3957 ext4_remove_li_request(elr);
3958 }
3959 mutex_unlock(&ext4_li_info->li_list_mtx);
3960}
3961
3962static int ext4_run_lazyinit_thread(void)
3963{
8f1f7453
ES
3964 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3965 ext4_li_info, "ext4lazyinit");
3966 if (IS_ERR(ext4_lazyinit_task)) {
3967 int err = PTR_ERR(ext4_lazyinit_task);
bfff6873 3968 ext4_clear_request_list();
bfff6873
LC
3969 kfree(ext4_li_info);
3970 ext4_li_info = NULL;
92b97816 3971 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
bfff6873
LC
3972 "initialization thread\n",
3973 err);
3974 return err;
3975 }
3976 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
bfff6873
LC
3977 return 0;
3978}
3979
3980/*
3981 * Check whether it make sense to run itable init. thread or not.
3982 * If there is at least one uninitialized inode table, return
3983 * corresponding group number, else the loop goes through all
3984 * groups and return total number of groups.
3985 */
3986static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3987{
3988 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3989 struct ext4_group_desc *gdp = NULL;
3990
8844618d
TT
3991 if (!ext4_has_group_desc_csum(sb))
3992 return ngroups;
3993
bfff6873
LC
3994 for (group = 0; group < ngroups; group++) {
3995 gdp = ext4_get_group_desc(sb, group, NULL);
3996 if (!gdp)
3997 continue;
3998
50122847 3999 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
bfff6873
LC
4000 break;
4001 }
4002
4003 return group;
4004}
4005
4006static int ext4_li_info_new(void)
4007{
4008 struct ext4_lazy_init *eli = NULL;
4009
4010 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
4011 if (!eli)
4012 return -ENOMEM;
4013
bfff6873
LC
4014 INIT_LIST_HEAD(&eli->li_request_list);
4015 mutex_init(&eli->li_list_mtx);
4016
bfff6873
LC
4017 eli->li_state |= EXT4_LAZYINIT_QUIT;
4018
4019 ext4_li_info = eli;
4020
4021 return 0;
4022}
4023
4024static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
4025 ext4_group_t start)
4026{
bfff6873 4027 struct ext4_li_request *elr;
bfff6873
LC
4028
4029 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
4030 if (!elr)
4031 return NULL;
4032
4033 elr->lr_super = sb;
3d392b26 4034 elr->lr_first_not_zeroed = start;
21175ca4 4035 if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3d392b26
TT
4036 elr->lr_mode = EXT4_LI_MODE_ITABLE;
4037 elr->lr_next_group = start;
21175ca4
HS
4038 } else {
4039 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3d392b26 4040 }
bfff6873
LC
4041
4042 /*
4043 * Randomize first schedule time of the request to
4044 * spread the inode table initialization requests
4045 * better.
4046 */
dd1f723b
TT
4047 elr->lr_next_sched = jiffies + (prandom_u32() %
4048 (EXT4_DEF_LI_MAX_START_DELAY * HZ));
bfff6873
LC
4049 return elr;
4050}
4051
7f511862
TT
4052int ext4_register_li_request(struct super_block *sb,
4053 ext4_group_t first_not_zeroed)
bfff6873
LC
4054{
4055 struct ext4_sb_info *sbi = EXT4_SB(sb);
7f511862 4056 struct ext4_li_request *elr = NULL;
49598e04 4057 ext4_group_t ngroups = sbi->s_groups_count;
6c5a6cb9 4058 int ret = 0;
bfff6873 4059
7f511862 4060 mutex_lock(&ext4_li_mtx);
51ce6511
LC
4061 if (sbi->s_li_request != NULL) {
4062 /*
4063 * Reset timeout so it can be computed again, because
4064 * s_li_wait_mult might have changed.
4065 */
4066 sbi->s_li_request->lr_timeout = 0;
7f511862 4067 goto out;
51ce6511 4068 }
bfff6873 4069
21175ca4 4070 if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3d392b26
TT
4071 (first_not_zeroed == ngroups || sb_rdonly(sb) ||
4072 !test_opt(sb, INIT_INODE_TABLE)))
7f511862 4073 goto out;
bfff6873
LC
4074
4075 elr = ext4_li_request_new(sb, first_not_zeroed);
7f511862
TT
4076 if (!elr) {
4077 ret = -ENOMEM;
4078 goto out;
4079 }
bfff6873
LC
4080
4081 if (NULL == ext4_li_info) {
4082 ret = ext4_li_info_new();
4083 if (ret)
4084 goto out;
4085 }
4086
4087 mutex_lock(&ext4_li_info->li_list_mtx);
4088 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
4089 mutex_unlock(&ext4_li_info->li_list_mtx);
4090
4091 sbi->s_li_request = elr;
46e4690b
TM
4092 /*
4093 * set elr to NULL here since it has been inserted to
4094 * the request_list and the removal and free of it is
4095 * handled by ext4_clear_request_list from now on.
4096 */
4097 elr = NULL;
bfff6873
LC
4098
4099 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4100 ret = ext4_run_lazyinit_thread();
4101 if (ret)
4102 goto out;
4103 }
bfff6873 4104out:
beed5ecb
NK
4105 mutex_unlock(&ext4_li_mtx);
4106 if (ret)
bfff6873 4107 kfree(elr);
bfff6873
LC
4108 return ret;
4109}
4110
4111/*
4112 * We do not need to lock anything since this is called on
4113 * module unload.
4114 */
4115static void ext4_destroy_lazyinit_thread(void)
4116{
4117 /*
4118 * If thread exited earlier
4119 * there's nothing to be done.
4120 */
8f1f7453 4121 if (!ext4_li_info || !ext4_lazyinit_task)
bfff6873
LC
4122 return;
4123
8f1f7453 4124 kthread_stop(ext4_lazyinit_task);
bfff6873
LC
4125}
4126
25ed6e8a
DW
4127static int set_journal_csum_feature_set(struct super_block *sb)
4128{
4129 int ret = 1;
4130 int compat, incompat;
4131 struct ext4_sb_info *sbi = EXT4_SB(sb);
4132
9aa5d32b 4133 if (ext4_has_metadata_csum(sb)) {
db9ee220 4134 /* journal checksum v3 */
25ed6e8a 4135 compat = 0;
db9ee220 4136 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
25ed6e8a
DW
4137 } else {
4138 /* journal checksum v1 */
4139 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4140 incompat = 0;
4141 }
4142
feb8c6d3
DW
4143 jbd2_journal_clear_features(sbi->s_journal,
4144 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4145 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4146 JBD2_FEATURE_INCOMPAT_CSUM_V2);
25ed6e8a
DW
4147 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4148 ret = jbd2_journal_set_features(sbi->s_journal,
4149 compat, 0,
4150 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4151 incompat);
4152 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4153 ret = jbd2_journal_set_features(sbi->s_journal,
4154 compat, 0,
4155 incompat);
4156 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4157 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4158 } else {
feb8c6d3
DW
4159 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4160 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
25ed6e8a
DW
4161 }
4162
4163 return ret;
4164}
4165
952fc18e
TT
4166/*
4167 * Note: calculating the overhead so we can be compatible with
4168 * historical BSD practice is quite difficult in the face of
4169 * clusters/bigalloc. This is because multiple metadata blocks from
4170 * different block group can end up in the same allocation cluster.
4171 * Calculating the exact overhead in the face of clustered allocation
4172 * requires either O(all block bitmaps) in memory or O(number of block
4173 * groups**2) in time. We will still calculate the superblock for
4174 * older file systems --- and if we come across with a bigalloc file
4175 * system with zero in s_overhead_clusters the estimate will be close to
4176 * correct especially for very large cluster sizes --- but for newer
4177 * file systems, it's better to calculate this figure once at mkfs
4178 * time, and store it in the superblock. If the superblock value is
4179 * present (even for non-bigalloc file systems), we will use it.
4180 */
4181static int count_overhead(struct super_block *sb, ext4_group_t grp,
4182 char *buf)
4183{
4184 struct ext4_sb_info *sbi = EXT4_SB(sb);
4185 struct ext4_group_desc *gdp;
4186 ext4_fsblk_t first_block, last_block, b;
4187 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4188 int s, j, count = 0;
4189
e2b911c5 4190 if (!ext4_has_feature_bigalloc(sb))
0548bbb8
TT
4191 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
4192 sbi->s_itb_per_group + 2);
4193
952fc18e
TT
4194 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4195 (grp * EXT4_BLOCKS_PER_GROUP(sb));
4196 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4197 for (i = 0; i < ngroups; i++) {
4198 gdp = ext4_get_group_desc(sb, i, NULL);
4199 b = ext4_block_bitmap(sb, gdp);
4200 if (b >= first_block && b <= last_block) {
4201 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4202 count++;
4203 }
4204 b = ext4_inode_bitmap(sb, gdp);
4205 if (b >= first_block && b <= last_block) {
4206 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4207 count++;
4208 }
4209 b = ext4_inode_table(sb, gdp);
4210 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4211 for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4212 int c = EXT4_B2C(sbi, b - first_block);
4213 ext4_set_bit(c, buf);
4214 count++;
4215 }
4216 if (i != grp)
4217 continue;
4218 s = 0;
4219 if (ext4_bg_has_super(sb, grp)) {
4220 ext4_set_bit(s++, buf);
4221 count++;
4222 }
c48ae41b
TT
4223 j = ext4_bg_num_gdb(sb, grp);
4224 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4225 ext4_error(sb, "Invalid number of block group "
4226 "descriptor blocks: %d", j);
4227 j = EXT4_BLOCKS_PER_GROUP(sb) - s;
952fc18e 4228 }
c48ae41b
TT
4229 count += j;
4230 for (; j > 0; j--)
4231 ext4_set_bit(EXT4_B2C(sbi, s++), buf);
952fc18e
TT
4232 }
4233 if (!count)
4234 return 0;
4235 return EXT4_CLUSTERS_PER_GROUP(sb) -
4236 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4237}
4238
4239/*
4240 * Compute the overhead and stash it in sbi->s_overhead
4241 */
4242int ext4_calculate_overhead(struct super_block *sb)
4243{
4244 struct ext4_sb_info *sbi = EXT4_SB(sb);
4245 struct ext4_super_block *es = sbi->s_es;
3c816ded
EW
4246 struct inode *j_inode;
4247 unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
952fc18e
TT
4248 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4249 ext4_fsblk_t overhead = 0;
4fdb5543 4250 char *buf = (char *) get_zeroed_page(GFP_NOFS);
952fc18e 4251
952fc18e
TT
4252 if (!buf)
4253 return -ENOMEM;
4254
4255 /*
4256 * Compute the overhead (FS structures). This is constant
4257 * for a given filesystem unless the number of block groups
4258 * changes so we cache the previous value until it does.
4259 */
4260
4261 /*
4262 * All of the blocks before first_data_block are overhead
4263 */
4264 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4265
4266 /*
4267 * Add the overhead found in each block group
4268 */
4269 for (i = 0; i < ngroups; i++) {
4270 int blks;
4271
4272 blks = count_overhead(sb, i, buf);
4273 overhead += blks;
4274 if (blks)
4275 memset(buf, 0, PAGE_SIZE);
4276 cond_resched();
4277 }
3c816ded
EW
4278
4279 /*
4280 * Add the internal journal blocks whether the journal has been
4281 * loaded or not
4282 */
ee7ed3aa 4283 if (sbi->s_journal && !sbi->s_journal_bdev)
ede7dc7f 4284 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
f1eec3b0
RH
4285 else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4286 /* j_inum for internal journal is non-zero */
3c816ded
EW
4287 j_inode = ext4_get_journal_inode(sb, j_inum);
4288 if (j_inode) {
4289 j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4290 overhead += EXT4_NUM_B2C(sbi, j_blocks);
4291 iput(j_inode);
4292 } else {
4293 ext4_msg(sb, KERN_ERR, "can't get journal size");
4294 }
4295 }
952fc18e
TT
4296 sbi->s_overhead = overhead;
4297 smp_wmb();
4298 free_page((unsigned long) buf);
4299 return 0;
4300}
4301
b5799018 4302static void ext4_set_resv_clusters(struct super_block *sb)
27dd4385
LC
4303{
4304 ext4_fsblk_t resv_clusters;
b5799018 4305 struct ext4_sb_info *sbi = EXT4_SB(sb);
27dd4385 4306
30fac0f7
JK
4307 /*
4308 * There's no need to reserve anything when we aren't using extents.
4309 * The space estimates are exact, there are no unwritten extents,
4310 * hole punching doesn't need new metadata... This is needed especially
4311 * to keep ext2/3 backward compatibility.
4312 */
e2b911c5 4313 if (!ext4_has_feature_extents(sb))
b5799018 4314 return;
27dd4385
LC
4315 /*
4316 * By default we reserve 2% or 4096 clusters, whichever is smaller.
4317 * This should cover the situations where we can not afford to run
4318 * out of space like for example punch hole, or converting
556615dc 4319 * unwritten extents in delalloc path. In most cases such
27dd4385
LC
4320 * allocation would require 1, or 2 blocks, higher numbers are
4321 * very rare.
4322 */
b5799018
TT
4323 resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4324 sbi->s_cluster_bits);
27dd4385
LC
4325
4326 do_div(resv_clusters, 50);
4327 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4328
b5799018 4329 atomic64_set(&sbi->s_resv_clusters, resv_clusters);
27dd4385
LC
4330}
4331
ca9b404f
RA
4332static const char *ext4_quota_mode(struct super_block *sb)
4333{
4334#ifdef CONFIG_QUOTA
4335 if (!ext4_quota_capable(sb))
4336 return "none";
4337
4338 if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4339 return "journalled";
4340 else
4341 return "writeback";
4342#else
4343 return "disabled";
4344#endif
4345}
4346
188c299e
JK
4347static void ext4_setup_csum_trigger(struct super_block *sb,
4348 enum ext4_journal_trigger_type type,
4349 void (*trigger)(
4350 struct jbd2_buffer_trigger_type *type,
4351 struct buffer_head *bh,
4352 void *mapped_data,
4353 size_t size))
4354{
4355 struct ext4_sb_info *sbi = EXT4_SB(sb);
4356
4357 sbi->s_journal_triggers[type].sb = sb;
4358 sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4359}
4360
2b2d6d01 4361static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ac27a0ec 4362{
5e405595 4363 struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
d4c402d9 4364 char *orig_data = kstrdup(data, GFP_KERNEL);
1d0c3924 4365 struct buffer_head *bh, **group_desc;
617ba13b 4366 struct ext4_super_block *es = NULL;
5aee0f8a 4367 struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
7c990728 4368 struct flex_groups **flex_groups;
617ba13b
MC
4369 ext4_fsblk_t block;
4370 ext4_fsblk_t sb_block = get_sb_block(&data);
70bbb3e0 4371 ext4_fsblk_t logical_sb_block;
ac27a0ec 4372 unsigned long offset = 0;
ac27a0ec
DK
4373 unsigned long def_mount_opts;
4374 struct inode *root;
0390131b 4375 const char *descr;
dcc7dae3 4376 int ret = -ENOMEM;
281b5995 4377 int blocksize, clustersize;
4ec11028
TT
4378 unsigned int db_count;
4379 unsigned int i;
ef5fd681 4380 int needs_recovery, has_huge_files;
bd81d8ee 4381 __u64 blocks_count;
07aa2ea1 4382 int err = 0;
bfff6873 4383 ext4_group_t first_not_zeroed;
e6e268cb 4384 struct ext4_fs_context parsed_opts = {0};
b237e304
HS
4385
4386 /* Set defaults for the variables that will be set during parsing */
4387 parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4388 parsed_opts.journal_devnum = 0;
196e402a 4389 parsed_opts.mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN;
ac27a0ec 4390
5aee0f8a
TT
4391 if ((data && !orig_data) || !sbi)
4392 goto out_free_base;
705895b6 4393
aed9eb1b 4394 sbi->s_daxdev = dax_dev;
705895b6
PE
4395 sbi->s_blockgroup_lock =
4396 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
5aee0f8a
TT
4397 if (!sbi->s_blockgroup_lock)
4398 goto out_free_base;
4399
ac27a0ec 4400 sb->s_fs_info = sbi;
2c0544b2 4401 sbi->s_sb = sb;
240799cd 4402 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
d9c9bef1 4403 sbi->s_sb_block = sb_block;
8446fe92
CH
4404 sbi->s_sectors_written_start =
4405 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
ac27a0ec 4406
9f6200bb 4407 /* Cleanup superblock name */
ec3904dc 4408 strreplace(sb->s_id, '/', '!');
9f6200bb 4409
07aa2ea1 4410 /* -EINVAL is default */
dcc7dae3 4411 ret = -EINVAL;
617ba13b 4412 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
ac27a0ec 4413 if (!blocksize) {
b31e1552 4414 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
ac27a0ec
DK
4415 goto out_fail;
4416 }
4417
4418 /*
617ba13b 4419 * The ext4 superblock will not be buffer aligned for other than 1kB
ac27a0ec
DK
4420 * block sizes. We need to calculate the offset from buffer start.
4421 */
617ba13b 4422 if (blocksize != EXT4_MIN_BLOCK_SIZE) {
70bbb3e0
AM
4423 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
4424 offset = do_div(logical_sb_block, blocksize);
ac27a0ec 4425 } else {
70bbb3e0 4426 logical_sb_block = sb_block;
ac27a0ec
DK
4427 }
4428
8394a6ab 4429 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4430 if (IS_ERR(bh)) {
b31e1552 4431 ext4_msg(sb, KERN_ERR, "unable to read superblock");
8394a6ab 4432 ret = PTR_ERR(bh);
ac27a0ec
DK
4433 goto out_fail;
4434 }
4435 /*
4436 * Note: s_es must be initialized as soon as possible because
617ba13b 4437 * some ext4 macro-instructions depend on its value
ac27a0ec 4438 */
2716b802 4439 es = (struct ext4_super_block *) (bh->b_data + offset);
ac27a0ec
DK
4440 sbi->s_es = es;
4441 sb->s_magic = le16_to_cpu(es->s_magic);
617ba13b
MC
4442 if (sb->s_magic != EXT4_SUPER_MAGIC)
4443 goto cantfind_ext4;
afc32f7e 4444 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
ac27a0ec 4445
feb0ab32 4446 /* Warn if metadata_csum and gdt_csum are both set. */
e2b911c5
DW
4447 if (ext4_has_feature_metadata_csum(sb) &&
4448 ext4_has_feature_gdt_csum(sb))
363307e6 4449 ext4_warning(sb, "metadata_csum and uninit_bg are "
feb0ab32
DW
4450 "redundant flags; please run fsck.");
4451
d25425f8
DW
4452 /* Check for a known checksum algorithm */
4453 if (!ext4_verify_csum_type(sb, es)) {
4454 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4455 "unknown checksum algorithm.");
4456 silent = 1;
4457 goto cantfind_ext4;
4458 }
02f310fc
JK
4459 ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4460 ext4_orphan_file_block_trigger);
d25425f8 4461
0441984a 4462 /* Load the checksum driver */
a45403b5
TT
4463 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4464 if (IS_ERR(sbi->s_chksum_driver)) {
4465 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4466 ret = PTR_ERR(sbi->s_chksum_driver);
4467 sbi->s_chksum_driver = NULL;
4468 goto failed_mount;
0441984a
DW
4469 }
4470
a9c47317
DW
4471 /* Check superblock checksum */
4472 if (!ext4_superblock_csum_verify(sb, es)) {
4473 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4474 "invalid superblock checksum. Run e2fsck?");
4475 silent = 1;
6a797d27 4476 ret = -EFSBADCRC;
a9c47317
DW
4477 goto cantfind_ext4;
4478 }
4479
4480 /* Precompute checksum seed for all metadata */
e2b911c5 4481 if (ext4_has_feature_csum_seed(sb))
8c81bd8f 4482 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
dec214d0 4483 else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
a9c47317
DW
4484 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4485 sizeof(es->s_uuid));
4486
ac27a0ec
DK
4487 /* Set defaults before we parse the mount options */
4488 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
fd8c37ec 4489 set_opt(sb, INIT_INODE_TABLE);
617ba13b 4490 if (def_mount_opts & EXT4_DEFM_DEBUG)
fd8c37ec 4491 set_opt(sb, DEBUG);
87f26807 4492 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
fd8c37ec 4493 set_opt(sb, GRPID);
617ba13b 4494 if (def_mount_opts & EXT4_DEFM_UID16)
fd8c37ec 4495 set_opt(sb, NO_UID32);
ea663336 4496 /* xattr user namespace & acls are now defaulted on */
ea663336 4497 set_opt(sb, XATTR_USER);
03010a33 4498#ifdef CONFIG_EXT4_FS_POSIX_ACL
ea663336 4499 set_opt(sb, POSIX_ACL);
2e7842b8 4500#endif
995a3ed6
HS
4501 if (ext4_has_feature_fast_commit(sb))
4502 set_opt2(sb, JOURNAL_FAST_COMMIT);
98c1a759
DW
4503 /* don't forget to enable journal_csum when metadata_csum is enabled. */
4504 if (ext4_has_metadata_csum(sb))
4505 set_opt(sb, JOURNAL_CHECKSUM);
4506
617ba13b 4507 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
fd8c37ec 4508 set_opt(sb, JOURNAL_DATA);
617ba13b 4509 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
fd8c37ec 4510 set_opt(sb, ORDERED_DATA);
617ba13b 4511 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
fd8c37ec 4512 set_opt(sb, WRITEBACK_DATA);
617ba13b
MC
4513
4514 if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
fd8c37ec 4515 set_opt(sb, ERRORS_PANIC);
bb4f397a 4516 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
fd8c37ec 4517 set_opt(sb, ERRORS_CONT);
bb4f397a 4518 else
fd8c37ec 4519 set_opt(sb, ERRORS_RO);
45f1a9c3
DW
4520 /* block_validity enabled by default; disable with noblock_validity */
4521 set_opt(sb, BLOCK_VALIDITY);
8b67f04a 4522 if (def_mount_opts & EXT4_DEFM_DISCARD)
fd8c37ec 4523 set_opt(sb, DISCARD);
ac27a0ec 4524
08cefc7a
EB
4525 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
4526 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
30773840
TT
4527 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
4528 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
4529 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
ac27a0ec 4530
8b67f04a 4531 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
fd8c37ec 4532 set_opt(sb, BARRIER);
ac27a0ec 4533
dd919b98
AK
4534 /*
4535 * enable delayed allocation by default
4536 * Use -o nodelalloc to turn it off
4537 */
bc0b75f7 4538 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
8b67f04a 4539 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
fd8c37ec 4540 set_opt(sb, DELALLOC);
dd919b98 4541
51ce6511
LC
4542 /*
4543 * set default s_li_wait_mult for lazyinit, for the case there is
4544 * no mount option specified.
4545 */
4546 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
4547
c9200760
TT
4548 if (le32_to_cpu(es->s_log_block_size) >
4549 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4f97a681 4550 ext4_msg(sb, KERN_ERR,
c9200760
TT
4551 "Invalid log block size: %u",
4552 le32_to_cpu(es->s_log_block_size));
4f97a681
TT
4553 goto failed_mount;
4554 }
c9200760
TT
4555 if (le32_to_cpu(es->s_log_cluster_size) >
4556 (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4f97a681 4557 ext4_msg(sb, KERN_ERR,
c9200760
TT
4558 "Invalid log cluster size: %u",
4559 le32_to_cpu(es->s_log_cluster_size));
4f97a681
TT
4560 goto failed_mount;
4561 }
4562
c9200760
TT
4563 blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
4564
4565 if (blocksize == PAGE_SIZE)
4566 set_opt(sb, DIOREAD_NOLOCK);
4f97a681 4567
9803387c
TT
4568 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4569 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4570 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4571 } else {
4572 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4573 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4574 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4575 ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4576 sbi->s_first_ino);
4577 goto failed_mount;
4578 }
4579 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4580 (!is_power_of_2(sbi->s_inode_size)) ||
4581 (sbi->s_inode_size > blocksize)) {
4582 ext4_msg(sb, KERN_ERR,
4583 "unsupported inode size: %d",
4584 sbi->s_inode_size);
4f97a681 4585 ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize);
9803387c
TT
4586 goto failed_mount;
4587 }
4588 /*
4589 * i_atime_extra is the last extra field available for
4590 * [acm]times in struct ext4_inode. Checking for that
4591 * field should suffice to ensure we have extra space
4592 * for all three.
4593 */
4594 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4595 sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4596 sb->s_time_gran = 1;
4597 sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4598 } else {
4599 sb->s_time_gran = NSEC_PER_SEC;
4600 sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4601 }
4602 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4603 }
4604 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4605 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4606 EXT4_GOOD_OLD_INODE_SIZE;
4607 if (ext4_has_feature_extra_isize(sb)) {
4608 unsigned v, max = (sbi->s_inode_size -
4609 EXT4_GOOD_OLD_INODE_SIZE);
4610
4611 v = le16_to_cpu(es->s_want_extra_isize);
4612 if (v > max) {
4613 ext4_msg(sb, KERN_ERR,
4614 "bad s_want_extra_isize: %d", v);
4615 goto failed_mount;
4616 }
4617 if (sbi->s_want_extra_isize < v)
4618 sbi->s_want_extra_isize = v;
4619
4620 v = le16_to_cpu(es->s_min_extra_isize);
4621 if (v > max) {
4622 ext4_msg(sb, KERN_ERR,
4623 "bad s_min_extra_isize: %d", v);
4624 goto failed_mount;
4625 }
4626 if (sbi->s_want_extra_isize < v)
4627 sbi->s_want_extra_isize = v;
4628 }
4629 }
4630
5aee0f8a
TT
4631 if (sbi->s_es->s_mount_opts[0]) {
4632 char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
4633 sizeof(sbi->s_es->s_mount_opts),
4634 GFP_KERNEL);
4635 if (!s_mount_opts)
4636 goto failed_mount;
b237e304 4637 if (!parse_options(s_mount_opts, sb, &parsed_opts, 0)) {
5aee0f8a
TT
4638 ext4_msg(sb, KERN_WARNING,
4639 "failed to parse options in superblock: %s",
4640 s_mount_opts);
4641 }
4642 kfree(s_mount_opts);
8b67f04a 4643 }
5a916be1 4644 sbi->s_def_mount_opt = sbi->s_mount_opt;
b237e304 4645 if (!parse_options((char *) data, sb, &parsed_opts, 0))
ac27a0ec
DK
4646 goto failed_mount;
4647
c83ad55e 4648#ifdef CONFIG_UNICODE
f8f4acb6 4649 if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
c83ad55e
GKB
4650 const struct ext4_sb_encodings *encoding_info;
4651 struct unicode_map *encoding;
4652 __u16 encoding_flags;
4653
c83ad55e
GKB
4654 if (ext4_sb_read_encoding(es, &encoding_info,
4655 &encoding_flags)) {
4656 ext4_msg(sb, KERN_ERR,
4657 "Encoding requested by superblock is unknown");
4658 goto failed_mount;
4659 }
4660
4661 encoding = utf8_load(encoding_info->version);
4662 if (IS_ERR(encoding)) {
4663 ext4_msg(sb, KERN_ERR,
4664 "can't mount with superblock charset: %s-%s "
4665 "not supported by the kernel. flags: 0x%x.",
4666 encoding_info->name, encoding_info->version,
4667 encoding_flags);
4668 goto failed_mount;
4669 }
4670 ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4671 "%s-%s with flags 0x%hx", encoding_info->name,
4672 encoding_info->version?:"\b", encoding_flags);
4673
f8f4acb6
DR
4674 sb->s_encoding = encoding;
4675 sb->s_encoding_flags = encoding_flags;
c83ad55e
GKB
4676 }
4677#endif
4678
56889787 4679 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
556e0319 4680 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n");
781c036b 4681 /* can't mount with both data=journal and dioread_nolock. */
244adf64 4682 clear_opt(sb, DIOREAD_NOLOCK);
556e0319 4683 clear_opt2(sb, JOURNAL_FAST_COMMIT);
56889787
TT
4684 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4685 ext4_msg(sb, KERN_ERR, "can't mount with "
4686 "both data=journal and delalloc");
4687 goto failed_mount;
4688 }
fc626fe3 4689 if (test_opt(sb, DAX_ALWAYS)) {
923ae0ff
RZ
4690 ext4_msg(sb, KERN_ERR, "can't mount with "
4691 "both data=journal and dax");
4692 goto failed_mount;
4693 }
73b92a2a
SK
4694 if (ext4_has_feature_encrypt(sb)) {
4695 ext4_msg(sb, KERN_WARNING,
4696 "encrypted files will use data=ordered "
4697 "instead of data journaling mode");
4698 }
56889787
TT
4699 if (test_opt(sb, DELALLOC))
4700 clear_opt(sb, DELALLOC);
001e4a87
TH
4701 } else {
4702 sb->s_iflags |= SB_I_CGROUPWB;
56889787
TT
4703 }
4704
1751e8a6
LT
4705 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
4706 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
ac27a0ec 4707
617ba13b 4708 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
e2b911c5
DW
4709 (ext4_has_compat_features(sb) ||
4710 ext4_has_ro_compat_features(sb) ||
4711 ext4_has_incompat_features(sb)))
b31e1552
ES
4712 ext4_msg(sb, KERN_WARNING,
4713 "feature flags set on rev 0 fs, "
4714 "running e2fsck is recommended");
469108ff 4715
ed3654eb
TT
4716 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4717 set_opt2(sb, HURD_COMPAT);
e2b911c5 4718 if (ext4_has_feature_64bit(sb)) {
ed3654eb
TT
4719 ext4_msg(sb, KERN_ERR,
4720 "The Hurd can't support 64-bit file systems");
4721 goto failed_mount;
4722 }
dec214d0
TE
4723
4724 /*
4725 * ea_inode feature uses l_i_version field which is not
4726 * available in HURD_COMPAT mode.
4727 */
4728 if (ext4_has_feature_ea_inode(sb)) {
4729 ext4_msg(sb, KERN_ERR,
4730 "ea_inode feature is not supported for Hurd");
4731 goto failed_mount;
4732 }
ed3654eb
TT
4733 }
4734
2035e776
TT
4735 if (IS_EXT2_SB(sb)) {
4736 if (ext2_feature_set_ok(sb))
4737 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4738 "using the ext4 subsystem");
4739 else {
0d9366d6
ES
4740 /*
4741 * If we're probing be silent, if this looks like
4742 * it's actually an ext[34] filesystem.
4743 */
4744 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4745 goto failed_mount;
2035e776
TT
4746 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4747 "to feature incompatibilities");
4748 goto failed_mount;
4749 }
4750 }
4751
4752 if (IS_EXT3_SB(sb)) {
4753 if (ext3_feature_set_ok(sb))
4754 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4755 "using the ext4 subsystem");
4756 else {
0d9366d6
ES
4757 /*
4758 * If we're probing be silent, if this looks like
4759 * it's actually an ext4 filesystem.
4760 */
4761 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4762 goto failed_mount;
2035e776
TT
4763 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4764 "to feature incompatibilities");
4765 goto failed_mount;
4766 }
4767 }
4768
ac27a0ec
DK
4769 /*
4770 * Check feature flags regardless of the revision level, since we
4771 * previously didn't change the revision level when setting the flags,
4772 * so there is a chance incompat flags are set on a rev 0 filesystem.
4773 */
bc98a42c 4774 if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
ac27a0ec 4775 goto failed_mount;
a13fb1a4 4776
5b9554dc
TT
4777 if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
4778 ext4_msg(sb, KERN_ERR,
4779 "Number of reserved GDT blocks insanely large: %d",
4780 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4781 goto failed_mount;
4782 }
4783
bdd3c50d
CH
4784 if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
4785 bdev_nr_sectors(sb->s_bdev)))
a8ab6d38
IW
4786 set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4787
fc626fe3 4788 if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
559db4c6
RZ
4789 if (ext4_has_feature_inline_data(sb)) {
4790 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4791 " that may contain inline data");
361d24d4 4792 goto failed_mount;
559db4c6 4793 }
a8ab6d38 4794 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
24f3478d 4795 ext4_msg(sb, KERN_ERR,
361d24d4
ES
4796 "DAX unsupported by block device.");
4797 goto failed_mount;
24f3478d 4798 }
923ae0ff
RZ
4799 }
4800
e2b911c5 4801 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
6ddb2447
TT
4802 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4803 es->s_encryption_level);
4804 goto failed_mount;
4805 }
4806
ac27a0ec 4807 if (sb->s_blocksize != blocksize) {
afd09b61
AM
4808 /*
4809 * bh must be released before kill_bdev(), otherwise
4810 * it won't be freed and its page also. kill_bdev()
4811 * is called by sb_set_blocksize().
4812 */
4813 brelse(bh);
ce40733c
AK
4814 /* Validate the filesystem blocksize */
4815 if (!sb_set_blocksize(sb, blocksize)) {
b31e1552 4816 ext4_msg(sb, KERN_ERR, "bad block size %d",
ce40733c 4817 blocksize);
afd09b61 4818 bh = NULL;
ac27a0ec
DK
4819 goto failed_mount;
4820 }
4821
70bbb3e0
AM
4822 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
4823 offset = do_div(logical_sb_block, blocksize);
8394a6ab 4824 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4825 if (IS_ERR(bh)) {
b31e1552
ES
4826 ext4_msg(sb, KERN_ERR,
4827 "Can't read superblock on 2nd try");
8394a6ab 4828 ret = PTR_ERR(bh);
4829 bh = NULL;
ac27a0ec
DK
4830 goto failed_mount;
4831 }
2716b802 4832 es = (struct ext4_super_block *)(bh->b_data + offset);
ac27a0ec 4833 sbi->s_es = es;
617ba13b 4834 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
b31e1552
ES
4835 ext4_msg(sb, KERN_ERR,
4836 "Magic mismatch, very weird!");
ac27a0ec
DK
4837 goto failed_mount;
4838 }
4839 }
4840
e2b911c5 4841 has_huge_files = ext4_has_feature_huge_file(sb);
f287a1a5
TT
4842 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
4843 has_huge_files);
4844 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
ac27a0ec 4845
0d1ee42f 4846 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
e2b911c5 4847 if (ext4_has_feature_64bit(sb)) {
8fadc143 4848 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
0d1ee42f 4849 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
d8ea6cf8 4850 !is_power_of_2(sbi->s_desc_size)) {
b31e1552
ES
4851 ext4_msg(sb, KERN_ERR,
4852 "unsupported descriptor size %lu",
0d1ee42f
AR
4853 sbi->s_desc_size);
4854 goto failed_mount;
4855 }
4856 } else
4857 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
0b8e58a1 4858
ac27a0ec 4859 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
ac27a0ec 4860 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
0b8e58a1 4861
617ba13b 4862 sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
ac27a0ec 4863 if (sbi->s_inodes_per_block == 0)
617ba13b 4864 goto cantfind_ext4;
cd6bb35b
TT
4865 if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
4866 sbi->s_inodes_per_group > blocksize * 8) {
4867 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
b9c538da 4868 sbi->s_inodes_per_group);
cd6bb35b
TT
4869 goto failed_mount;
4870 }
ac27a0ec
DK
4871 sbi->s_itb_per_group = sbi->s_inodes_per_group /
4872 sbi->s_inodes_per_block;
0d1ee42f 4873 sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
ac27a0ec
DK
4874 sbi->s_sbh = bh;
4875 sbi->s_mount_state = le16_to_cpu(es->s_state);
e57aa839
FW
4876 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
4877 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
0b8e58a1 4878
2b2d6d01 4879 for (i = 0; i < 4; i++)
ac27a0ec
DK
4880 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
4881 sbi->s_def_hash_version = es->s_def_hash_version;
e2b911c5 4882 if (ext4_has_feature_dir_index(sb)) {
23301410
TT
4883 i = le32_to_cpu(es->s_flags);
4884 if (i & EXT2_FLAGS_UNSIGNED_HASH)
4885 sbi->s_hash_unsigned = 3;
4886 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
f99b2589 4887#ifdef __CHAR_UNSIGNED__
bc98a42c 4888 if (!sb_rdonly(sb))
23301410
TT
4889 es->s_flags |=
4890 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
4891 sbi->s_hash_unsigned = 3;
f99b2589 4892#else
bc98a42c 4893 if (!sb_rdonly(sb))
23301410
TT
4894 es->s_flags |=
4895 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
f99b2589 4896#endif
23301410 4897 }
f99b2589 4898 }
ac27a0ec 4899
281b5995
TT
4900 /* Handle clustersize */
4901 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
ef5fd681 4902 if (ext4_has_feature_bigalloc(sb)) {
281b5995
TT
4903 if (clustersize < blocksize) {
4904 ext4_msg(sb, KERN_ERR,
4905 "cluster size (%d) smaller than "
4906 "block size (%d)", clustersize, blocksize);
4907 goto failed_mount;
4908 }
4909 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4910 le32_to_cpu(es->s_log_block_size);
4911 sbi->s_clusters_per_group =
4912 le32_to_cpu(es->s_clusters_per_group);
4913 if (sbi->s_clusters_per_group > blocksize * 8) {
4914 ext4_msg(sb, KERN_ERR,
4915 "#clusters per group too big: %lu",
4916 sbi->s_clusters_per_group);
4917 goto failed_mount;
4918 }
4919 if (sbi->s_blocks_per_group !=
4920 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
4921 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4922 "clusters per group (%lu) inconsistent",
4923 sbi->s_blocks_per_group,
4924 sbi->s_clusters_per_group);
4925 goto failed_mount;
4926 }
4927 } else {
4928 if (clustersize != blocksize) {
bfe0a5f4
TT
4929 ext4_msg(sb, KERN_ERR,
4930 "fragment/cluster size (%d) != "
4931 "block size (%d)", clustersize, blocksize);
4932 goto failed_mount;
281b5995
TT
4933 }
4934 if (sbi->s_blocks_per_group > blocksize * 8) {
4935 ext4_msg(sb, KERN_ERR,
4936 "#blocks per group too big: %lu",
4937 sbi->s_blocks_per_group);
4938 goto failed_mount;
4939 }
4940 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4941 sbi->s_cluster_bits = 0;
ac27a0ec 4942 }
281b5995
TT
4943 sbi->s_cluster_ratio = clustersize / blocksize;
4944
960fd856
TT
4945 /* Do we have standard group size of clustersize * 8 blocks ? */
4946 if (sbi->s_blocks_per_group == clustersize << 3)
4947 set_opt2(sb, STD_GROUP_SIZE);
4948
bf43d84b
ES
4949 /*
4950 * Test whether we have more sectors than will fit in sector_t,
4951 * and whether the max offset is addressable by the page cache.
4952 */
5a9ae68a 4953 err = generic_check_addressable(sb->s_blocksize_bits,
30ca22c7 4954 ext4_blocks_count(es));
5a9ae68a 4955 if (err) {
b31e1552 4956 ext4_msg(sb, KERN_ERR, "filesystem"
bf43d84b 4957 " too large to mount safely on this system");
ac27a0ec
DK
4958 goto failed_mount;
4959 }
4960
617ba13b
MC
4961 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
4962 goto cantfind_ext4;
e7c95593 4963
0f2ddca6 4964 /* check blocks count against device size */
5513b241 4965 blocks_count = sb_bdev_nr_blocks(sb);
0f2ddca6 4966 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
b31e1552
ES
4967 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4968 "exceeds size of device (%llu blocks)",
0f2ddca6
FTN
4969 ext4_blocks_count(es), blocks_count);
4970 goto failed_mount;
4971 }
4972
0b8e58a1
AD
4973 /*
4974 * It makes no sense for the first data block to be beyond the end
4975 * of the filesystem.
4976 */
4977 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
5635a62b 4978 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
b31e1552
ES
4979 "block %u is beyond end of filesystem (%llu)",
4980 le32_to_cpu(es->s_first_data_block),
4981 ext4_blocks_count(es));
e7c95593
ES
4982 goto failed_mount;
4983 }
bfe0a5f4
TT
4984 if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4985 (sbi->s_cluster_ratio == 1)) {
4986 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4987 "block is 0 with a 1k block and cluster size");
4988 goto failed_mount;
4989 }
4990
bd81d8ee
LV
4991 blocks_count = (ext4_blocks_count(es) -
4992 le32_to_cpu(es->s_first_data_block) +
4993 EXT4_BLOCKS_PER_GROUP(sb) - 1);
4994 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4ec11028 4995 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
df41460a 4996 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4ec11028 4997 "(block count %llu, first data block %u, "
df41460a 4998 "blocks per group %lu)", blocks_count,
4ec11028
TT
4999 ext4_blocks_count(es),
5000 le32_to_cpu(es->s_first_data_block),
5001 EXT4_BLOCKS_PER_GROUP(sb));
5002 goto failed_mount;
5003 }
bd81d8ee 5004 sbi->s_groups_count = blocks_count;
fb0a387d
ES
5005 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
5006 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
9e463084
TT
5007 if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
5008 le32_to_cpu(es->s_inodes_count)) {
5009 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
5010 le32_to_cpu(es->s_inodes_count),
5011 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
5012 ret = -EINVAL;
5013 goto failed_mount;
5014 }
617ba13b
MC
5015 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
5016 EXT4_DESC_PER_BLOCK(sb);
3a4b77cd 5017 if (ext4_has_feature_meta_bg(sb)) {
2ba3e6e8 5018 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
3a4b77cd
EG
5019 ext4_msg(sb, KERN_WARNING,
5020 "first meta block group too large: %u "
5021 "(group descriptor block count %u)",
5022 le32_to_cpu(es->s_first_meta_bg), db_count);
5023 goto failed_mount;
5024 }
5025 }
1d0c3924
TT
5026 rcu_assign_pointer(sbi->s_group_desc,
5027 kvmalloc_array(db_count,
5028 sizeof(struct buffer_head *),
5029 GFP_KERNEL));
ac27a0ec 5030 if (sbi->s_group_desc == NULL) {
b31e1552 5031 ext4_msg(sb, KERN_ERR, "not enough memory");
2cde417d 5032 ret = -ENOMEM;
ac27a0ec
DK
5033 goto failed_mount;
5034 }
5035
705895b6 5036 bgl_lock_init(sbi->s_blockgroup_lock);
ac27a0ec 5037
85c8f176
AP
5038 /* Pre-read the descriptors into the buffer cache */
5039 for (i = 0; i < db_count; i++) {
5040 block = descriptor_loc(sb, logical_sb_block, i);
5df1d412 5041 ext4_sb_breadahead_unmovable(sb, block);
85c8f176
AP
5042 }
5043
ac27a0ec 5044 for (i = 0; i < db_count; i++) {
1d0c3924
TT
5045 struct buffer_head *bh;
5046
70bbb3e0 5047 block = descriptor_loc(sb, logical_sb_block, i);
8394a6ab 5048 bh = ext4_sb_bread_unmovable(sb, block);
5049 if (IS_ERR(bh)) {
b31e1552
ES
5050 ext4_msg(sb, KERN_ERR,
5051 "can't read group descriptor %d", i);
ac27a0ec 5052 db_count = i;
8394a6ab 5053 ret = PTR_ERR(bh);
ac27a0ec
DK
5054 goto failed_mount2;
5055 }
1d0c3924
TT
5056 rcu_read_lock();
5057 rcu_dereference(sbi->s_group_desc)[i] = bh;
5058 rcu_read_unlock();
ac27a0ec 5059 }
44de022c 5060 sbi->s_gdb_count = db_count;
829fa70d 5061 if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
b31e1552 5062 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
6a797d27 5063 ret = -EFSCORRUPTED;
f9ae9cf5 5064 goto failed_mount2;
ac27a0ec 5065 }
772cb7c8 5066
235699a8 5067 timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
c92dc856
JK
5068 spin_lock_init(&sbi->s_error_lock);
5069 INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
04496411 5070
a75ae78f 5071 /* Register extent status tree shrinker */
eb68d0e2 5072 if (ext4_es_register_shrinker(sbi))
ce7e010a 5073 goto failed_mount3;
ce7e010a 5074
c9de560d 5075 sbi->s_stripe = ext4_get_stripe_size(sbi);
67a5da56 5076 sbi->s_extent_max_zeroout_kb = 32;
c9de560d 5077
f9ae9cf5
TT
5078 /*
5079 * set up enough so that it can read an inode
5080 */
f6e63f90 5081 sb->s_op = &ext4_sops;
617ba13b
MC
5082 sb->s_export_op = &ext4_export_ops;
5083 sb->s_xattr = ext4_xattr_handlers;
643fa961 5084#ifdef CONFIG_FS_ENCRYPTION
a7550b30 5085 sb->s_cop = &ext4_cryptops;
ffcc4182 5086#endif
c93d8f88
EB
5087#ifdef CONFIG_FS_VERITY
5088 sb->s_vop = &ext4_verityops;
5089#endif
ac27a0ec 5090#ifdef CONFIG_QUOTA
617ba13b 5091 sb->dq_op = &ext4_quota_operations;
e2b911c5 5092 if (ext4_has_feature_quota(sb))
1fa5efe3 5093 sb->s_qcop = &dquot_quotactl_sysfile_ops;
262b4662
JK
5094 else
5095 sb->s_qcop = &ext4_qctl_operations;
689c958c 5096 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
ac27a0ec 5097#endif
85787090 5098 memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
f2fa2ffc 5099
ac27a0ec 5100 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3b9d4ed2 5101 mutex_init(&sbi->s_orphan_lock);
ac27a0ec 5102
aa75f4d3
HS
5103 /* Initialize fast commit stuff */
5104 atomic_set(&sbi->s_fc_subtid, 0);
5105 atomic_set(&sbi->s_fc_ineligible_updates, 0);
5106 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
5107 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
5108 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
5109 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
5110 sbi->s_fc_bytes = 0;
9b5f6c9b
HS
5111 ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
5112 ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
aa75f4d3
HS
5113 spin_lock_init(&sbi->s_fc_lock);
5114 memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
8016e29f
HS
5115 sbi->s_fc_replay_state.fc_regions = NULL;
5116 sbi->s_fc_replay_state.fc_regions_size = 0;
5117 sbi->s_fc_replay_state.fc_regions_used = 0;
5118 sbi->s_fc_replay_state.fc_regions_valid = 0;
5119 sbi->s_fc_replay_state.fc_modified_inodes = NULL;
5120 sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
5121 sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
aa75f4d3 5122
ac27a0ec
DK
5123 sb->s_root = NULL;
5124
5125 needs_recovery = (es->s_last_orphan != 0 ||
02f310fc 5126 ext4_has_feature_orphan_present(sb) ||
e2b911c5 5127 ext4_has_feature_journal_needs_recovery(sb));
ac27a0ec 5128
bc98a42c 5129 if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
c5e06d10 5130 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
50460fe8 5131 goto failed_mount3a;
c5e06d10 5132
ac27a0ec
DK
5133 /*
5134 * The first inode we look at is the journal inode. Don't try
5135 * root first: it may be modified in the journal!
5136 */
e2b911c5 5137 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
b237e304 5138 err = ext4_load_journal(sb, es, parsed_opts.journal_devnum);
4753d8a2 5139 if (err)
50460fe8 5140 goto failed_mount3a;
bc98a42c 5141 } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
e2b911c5 5142 ext4_has_feature_journal_needs_recovery(sb)) {
b31e1552
ES
5143 ext4_msg(sb, KERN_ERR, "required journal recovery "
5144 "suppressed and not mounted read-only");
744692dc 5145 goto failed_mount_wq;
ac27a0ec 5146 } else {
1e381f60
DM
5147 /* Nojournal mode, all journal mount options are illegal */
5148 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5149 ext4_msg(sb, KERN_ERR, "can't mount with "
5150 "journal_checksum, fs mounted w/o journal");
5151 goto failed_mount_wq;
5152 }
5153 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5154 ext4_msg(sb, KERN_ERR, "can't mount with "
5155 "journal_async_commit, fs mounted w/o journal");
5156 goto failed_mount_wq;
5157 }
5158 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5159 ext4_msg(sb, KERN_ERR, "can't mount with "
5160 "commit=%lu, fs mounted w/o journal",
5161 sbi->s_commit_interval / HZ);
5162 goto failed_mount_wq;
5163 }
5164 if (EXT4_MOUNT_DATA_FLAGS &
5165 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5166 ext4_msg(sb, KERN_ERR, "can't mount with "
5167 "data=, fs mounted w/o journal");
5168 goto failed_mount_wq;
5169 }
50b29d8f 5170 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
1e381f60 5171 clear_opt(sb, JOURNAL_CHECKSUM);
fd8c37ec 5172 clear_opt(sb, DATA_FLAGS);
995a3ed6 5173 clear_opt2(sb, JOURNAL_FAST_COMMIT);
0390131b
FM
5174 sbi->s_journal = NULL;
5175 needs_recovery = 0;
5176 goto no_journal;
ac27a0ec
DK
5177 }
5178
e2b911c5 5179 if (ext4_has_feature_64bit(sb) &&
eb40a09c
JS
5180 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
5181 JBD2_FEATURE_INCOMPAT_64BIT)) {
b31e1552 5182 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
744692dc 5183 goto failed_mount_wq;
eb40a09c
JS
5184 }
5185
25ed6e8a
DW
5186 if (!set_journal_csum_feature_set(sb)) {
5187 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
5188 "feature set");
5189 goto failed_mount_wq;
d4da6c9c 5190 }
818d276c 5191
a1e5e465
HS
5192 if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
5193 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
5194 JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
5195 ext4_msg(sb, KERN_ERR,
5196 "Failed to set fast commit journal feature");
5197 goto failed_mount_wq;
5198 }
5199
ac27a0ec
DK
5200 /* We have now updated the journal if required, so we can
5201 * validate the data journaling mode. */
5202 switch (test_opt(sb, DATA_FLAGS)) {
5203 case 0:
5204 /* No mode set, assume a default based on the journal
63f57933
AM
5205 * capabilities: ORDERED_DATA if the journal can
5206 * cope, else JOURNAL_DATA
5207 */
dab291af 5208 if (jbd2_journal_check_available_features
27f394a7 5209 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
fd8c37ec 5210 set_opt(sb, ORDERED_DATA);
27f394a7
TN
5211 sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
5212 } else {
fd8c37ec 5213 set_opt(sb, JOURNAL_DATA);
27f394a7
TN
5214 sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
5215 }
ac27a0ec
DK
5216 break;
5217
617ba13b
MC
5218 case EXT4_MOUNT_ORDERED_DATA:
5219 case EXT4_MOUNT_WRITEBACK_DATA:
dab291af
MC
5220 if (!jbd2_journal_check_available_features
5221 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
b31e1552
ES
5222 ext4_msg(sb, KERN_ERR, "Journal does not support "
5223 "requested data journaling mode");
744692dc 5224 goto failed_mount_wq;
ac27a0ec 5225 }
5a150bde 5226 break;
ac27a0ec
DK
5227 default:
5228 break;
5229 }
ab04df78
JK
5230
5231 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
5232 test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5233 ext4_msg(sb, KERN_ERR, "can't mount with "
5234 "journal_async_commit in data=ordered mode");
5235 goto failed_mount_wq;
5236 }
5237
b237e304 5238 set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio);
ac27a0ec 5239
342af94e 5240 sbi->s_journal->j_submit_inode_data_buffers =
afb585a9 5241 ext4_journal_submit_inode_data_buffers;
342af94e 5242 sbi->s_journal->j_finish_inode_data_buffers =
afb585a9 5243 ext4_journal_finish_inode_data_buffers;
18aadd47 5244
ce7e010a 5245no_journal:
cdb7ee4c
TE
5246 if (!test_opt(sb, NO_MBCACHE)) {
5247 sbi->s_ea_block_cache = ext4_xattr_create_cache();
5248 if (!sbi->s_ea_block_cache) {
dec214d0 5249 ext4_msg(sb, KERN_ERR,
cdb7ee4c 5250 "Failed to create ea_block_cache");
dec214d0
TE
5251 goto failed_mount_wq;
5252 }
cdb7ee4c
TE
5253
5254 if (ext4_has_feature_ea_inode(sb)) {
5255 sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5256 if (!sbi->s_ea_inode_cache) {
5257 ext4_msg(sb, KERN_ERR,
5258 "Failed to create ea_inode_cache");
5259 goto failed_mount_wq;
5260 }
5261 }
9c191f70
M
5262 }
5263
c93d8f88
EB
5264 if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
5265 ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
5266 goto failed_mount_wq;
5267 }
5268
bc98a42c 5269 if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
e2b911c5
DW
5270 !ext4_has_feature_encrypt(sb)) {
5271 ext4_set_feature_encrypt(sb);
4392fbc4 5272 ext4_commit_super(sb);
6ddb2447
TT
5273 }
5274
952fc18e
TT
5275 /*
5276 * Get the # of file system overhead blocks from the
5277 * superblock if present.
5278 */
5279 if (es->s_overhead_clusters)
5280 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5281 else {
07aa2ea1
LC
5282 err = ext4_calculate_overhead(sb);
5283 if (err)
952fc18e
TT
5284 goto failed_mount_wq;
5285 }
5286
fd89d5f2
TH
5287 /*
5288 * The maximum number of concurrent works can be high and
5289 * concurrency isn't really necessary. Limit it to 1.
5290 */
2e8fa54e
JK
5291 EXT4_SB(sb)->rsv_conversion_wq =
5292 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5293 if (!EXT4_SB(sb)->rsv_conversion_wq) {
5294 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
07aa2ea1 5295 ret = -ENOMEM;
2e8fa54e
JK
5296 goto failed_mount4;
5297 }
5298
ac27a0ec 5299 /*
dab291af 5300 * The jbd2_journal_load will have done any necessary log recovery,
ac27a0ec
DK
5301 * so we can safely mount the rest of the filesystem now.
5302 */
5303
8a363970 5304 root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
1d1fe1ee 5305 if (IS_ERR(root)) {
b31e1552 5306 ext4_msg(sb, KERN_ERR, "get root inode failed");
1d1fe1ee 5307 ret = PTR_ERR(root);
32a9bb57 5308 root = NULL;
ac27a0ec
DK
5309 goto failed_mount4;
5310 }
5311 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
b31e1552 5312 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
94bf608a 5313 iput(root);
ac27a0ec
DK
5314 goto failed_mount4;
5315 }
b886ee3e 5316
48fde701 5317 sb->s_root = d_make_root(root);
1d1fe1ee 5318 if (!sb->s_root) {
b31e1552 5319 ext4_msg(sb, KERN_ERR, "get root dentry failed");
1d1fe1ee
DH
5320 ret = -ENOMEM;
5321 goto failed_mount4;
5322 }
ac27a0ec 5323
c89128a0
JK
5324 ret = ext4_setup_super(sb, es, sb_rdonly(sb));
5325 if (ret == -EROFS) {
1751e8a6 5326 sb->s_flags |= SB_RDONLY;
c89128a0
JK
5327 ret = 0;
5328 } else if (ret)
5329 goto failed_mount4a;
ef7f3835 5330
b5799018 5331 ext4_set_resv_clusters(sb);
27dd4385 5332
0f5bde1d
JK
5333 if (test_opt(sb, BLOCK_VALIDITY)) {
5334 err = ext4_setup_system_zone(sb);
5335 if (err) {
5336 ext4_msg(sb, KERN_ERR, "failed to initialize system "
5337 "zone (%d)", err);
5338 goto failed_mount4a;
5339 }
f9ae9cf5 5340 }
8016e29f 5341 ext4_fc_replay_cleanup(sb);
f9ae9cf5
TT
5342
5343 ext4_ext_init(sb);
196e402a
HS
5344
5345 /*
5346 * Enable optimize_scan if number of groups is > threshold. This can be
5347 * turned off by passing "mb_optimize_scan=0". This can also be
5348 * turned on forcefully by passing "mb_optimize_scan=1".
5349 */
5350 if (parsed_opts.mb_optimize_scan == 1)
5351 set_opt2(sb, MB_OPTIMIZE_SCAN);
5352 else if (parsed_opts.mb_optimize_scan == 0)
5353 clear_opt2(sb, MB_OPTIMIZE_SCAN);
5354 else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5355 set_opt2(sb, MB_OPTIMIZE_SCAN);
5356
f9ae9cf5
TT
5357 err = ext4_mb_init(sb);
5358 if (err) {
5359 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5360 err);
dcf2d804 5361 goto failed_mount5;
c2774d84
AK
5362 }
5363
027f14f5
TT
5364 /*
5365 * We can only set up the journal commit callback once
5366 * mballoc is initialized
5367 */
5368 if (sbi->s_journal)
5369 sbi->s_journal->j_commit_callback =
5370 ext4_journal_commit_callback;
5371
d5e03cbb 5372 block = ext4_count_free_clusters(sb);
666245d9 5373 ext4_free_blocks_count_set(sbi->s_es,
d5e03cbb 5374 EXT4_C2B(sbi, block));
908c7f19
TH
5375 err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
5376 GFP_KERNEL);
d5e03cbb
TT
5377 if (!err) {
5378 unsigned long freei = ext4_count_free_inodes(sb);
5379 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
908c7f19
TH
5380 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
5381 GFP_KERNEL);
d5e03cbb 5382 }
b2bbb92f
JK
5383 /*
5384 * Update the checksum after updating free space/inode
5385 * counters. Otherwise the superblock can have an incorrect
5386 * checksum in the buffer cache until it is written out and
5387 * e2fsprogs programs trying to open a file system immediately
5388 * after it is mounted can fail.
5389 */
5390 ext4_superblock_csum_set(sb);
d5e03cbb
TT
5391 if (!err)
5392 err = percpu_counter_init(&sbi->s_dirs_counter,
908c7f19 5393 ext4_count_dirs(sb), GFP_KERNEL);
d5e03cbb 5394 if (!err)
908c7f19
TH
5395 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
5396 GFP_KERNEL);
efc61345
EW
5397 if (!err)
5398 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
5399 GFP_KERNEL);
c8585c6f 5400 if (!err)
bbd55937 5401 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
c8585c6f 5402
d5e03cbb
TT
5403 if (err) {
5404 ext4_msg(sb, KERN_ERR, "insufficient memory");
5405 goto failed_mount6;
5406 }
5407
e2b911c5 5408 if (ext4_has_feature_flex_bg(sb))
d5e03cbb
TT
5409 if (!ext4_fill_flex_info(sb)) {
5410 ext4_msg(sb, KERN_ERR,
5411 "unable to initialize "
5412 "flex_bg meta info!");
8f6840c4 5413 ret = -ENOMEM;
d5e03cbb
TT
5414 goto failed_mount6;
5415 }
5416
bfff6873
LC
5417 err = ext4_register_li_request(sb, first_not_zeroed);
5418 if (err)
dcf2d804 5419 goto failed_mount6;
bfff6873 5420
b5799018 5421 err = ext4_register_sysfs(sb);
dcf2d804
TM
5422 if (err)
5423 goto failed_mount7;
3197ebdb 5424
02f310fc
JK
5425 err = ext4_init_orphan_info(sb);
5426 if (err)
5427 goto failed_mount8;
9b2ff357
JK
5428#ifdef CONFIG_QUOTA
5429 /* Enable quota usage during mount. */
bc98a42c 5430 if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
9b2ff357
JK
5431 err = ext4_enable_quotas(sb);
5432 if (err)
02f310fc 5433 goto failed_mount9;
9b2ff357
JK
5434 }
5435#endif /* CONFIG_QUOTA */
5436
bc71726c 5437 /*
5438 * Save the original bdev mapping's wb_err value which could be
5439 * used to detect the metadata async write error.
5440 */
5441 spin_lock_init(&sbi->s_bdev_wb_lock);
9704a322
ZX
5442 errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5443 &sbi->s_bdev_wb_err);
bc71726c 5444 sb->s_bdev->bd_super = sb;
617ba13b
MC
5445 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5446 ext4_orphan_cleanup(sb, es);
5447 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
0390131b 5448 if (needs_recovery) {
b31e1552 5449 ext4_msg(sb, KERN_INFO, "recovery complete");
11215630
JK
5450 err = ext4_mark_recovery_complete(sb, es);
5451 if (err)
02f310fc 5452 goto failed_mount9;
0390131b
FM
5453 }
5454 if (EXT4_SB(sb)->s_journal) {
5455 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5456 descr = " journalled data mode";
5457 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5458 descr = " ordered data mode";
5459 else
5460 descr = " writeback data mode";
5461 } else
5462 descr = "out journal";
5463
79add3a3
LC
5464 if (test_opt(sb, DISCARD)) {
5465 struct request_queue *q = bdev_get_queue(sb->s_bdev);
5466 if (!blk_queue_discard(q))
5467 ext4_msg(sb, KERN_WARNING,
5468 "mounting with \"discard\" option, but "
5469 "the device does not support discard");
5470 }
5471
e294a537
TT
5472 if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5473 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
ca9b404f 5474 "Opts: %.*s%s%s. Quota mode: %s.", descr,
5aee0f8a
TT
5475 (int) sizeof(sbi->s_es->s_mount_opts),
5476 sbi->s_es->s_mount_opts,
ca9b404f
RA
5477 *sbi->s_es->s_mount_opts ? "; " : "", orig_data,
5478 ext4_quota_mode(sb));
ac27a0ec 5479
66e61a9e
TT
5480 if (es->s_error_count)
5481 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
ac27a0ec 5482
efbed4dc
TT
5483 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5484 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5485 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5486 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
1cf006ed
DM
5487 atomic_set(&sbi->s_warning_count, 0);
5488 atomic_set(&sbi->s_msg_count, 0);
efbed4dc 5489
d4c402d9 5490 kfree(orig_data);
ac27a0ec
DK
5491 return 0;
5492
617ba13b 5493cantfind_ext4:
ac27a0ec 5494 if (!silent)
b31e1552 5495 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
ac27a0ec
DK
5496 goto failed_mount;
5497
02f310fc
JK
5498failed_mount9:
5499 ext4_release_orphan_info(sb);
72ba7450 5500failed_mount8:
ebd173be 5501 ext4_unregister_sysfs(sb);
cb8d53d2 5502 kobject_put(&sbi->s_kobj);
dcf2d804
TM
5503failed_mount7:
5504 ext4_unregister_li_request(sb);
5505failed_mount6:
f9ae9cf5 5506 ext4_mb_release(sb);
7c990728
SJS
5507 rcu_read_lock();
5508 flex_groups = rcu_dereference(sbi->s_flex_groups);
5509 if (flex_groups) {
5510 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
5511 kvfree(flex_groups[i]);
5512 kvfree(flex_groups);
5513 }
5514 rcu_read_unlock();
d5e03cbb
TT
5515 percpu_counter_destroy(&sbi->s_freeclusters_counter);
5516 percpu_counter_destroy(&sbi->s_freeinodes_counter);
5517 percpu_counter_destroy(&sbi->s_dirs_counter);
5518 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
efc61345 5519 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
bbd55937 5520 percpu_free_rwsem(&sbi->s_writepages_rwsem);
00764937 5521failed_mount5:
f9ae9cf5
TT
5522 ext4_ext_release(sb);
5523 ext4_release_system_zone(sb);
5524failed_mount4a:
94bf608a 5525 dput(sb->s_root);
32a9bb57 5526 sb->s_root = NULL;
94bf608a 5527failed_mount4:
b31e1552 5528 ext4_msg(sb, KERN_ERR, "mount failed");
2e8fa54e
JK
5529 if (EXT4_SB(sb)->rsv_conversion_wq)
5530 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4c0425ff 5531failed_mount_wq:
50c15df6
CX
5532 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5533 sbi->s_ea_inode_cache = NULL;
5534
5535 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5536 sbi->s_ea_block_cache = NULL;
5537
0390131b 5538 if (sbi->s_journal) {
bb9464e0 5539 /* flush s_error_work before journal destroy. */
5540 flush_work(&sbi->s_error_work);
0390131b
FM
5541 jbd2_journal_destroy(sbi->s_journal);
5542 sbi->s_journal = NULL;
5543 }
50460fe8 5544failed_mount3a:
d3922a77 5545 ext4_es_unregister_shrinker(sbi);
eb68d0e2 5546failed_mount3:
bb9464e0 5547 /* flush s_error_work before sbi destroy */
c92dc856 5548 flush_work(&sbi->s_error_work);
2a4ae3bc 5549 del_timer_sync(&sbi->s_err_report);
618f0031 5550 ext4_stop_mmpd(sbi);
ac27a0ec 5551failed_mount2:
1d0c3924
TT
5552 rcu_read_lock();
5553 group_desc = rcu_dereference(sbi->s_group_desc);
ac27a0ec 5554 for (i = 0; i < db_count; i++)
1d0c3924
TT
5555 brelse(group_desc[i]);
5556 kvfree(group_desc);
5557 rcu_read_unlock();
ac27a0ec 5558failed_mount:
0441984a
DW
5559 if (sbi->s_chksum_driver)
5560 crypto_free_shash(sbi->s_chksum_driver);
c83ad55e
GKB
5561
5562#ifdef CONFIG_UNICODE
f8f4acb6 5563 utf8_unload(sb->s_encoding);
c83ad55e
GKB
5564#endif
5565
ac27a0ec 5566#ifdef CONFIG_QUOTA
a2d4a646 5567 for (i = 0; i < EXT4_MAXQUOTAS; i++)
0ba33fac 5568 kfree(get_qf_name(sb, sbi, i));
ac27a0ec 5569#endif
ac4acb1f 5570 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
afd09b61 5571 /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
ac27a0ec 5572 brelse(bh);
afd09b61 5573 ext4_blkdev_remove(sbi);
ac27a0ec
DK
5574out_fail:
5575 sb->s_fs_info = NULL;
f6830165 5576 kfree(sbi->s_blockgroup_lock);
5aee0f8a 5577out_free_base:
ac27a0ec 5578 kfree(sbi);
d4c402d9 5579 kfree(orig_data);
5e405595 5580 fs_put_dax(dax_dev);
07aa2ea1 5581 return err ? err : ret;
ac27a0ec
DK
5582}
5583
5584/*
5585 * Setup any per-fs journal parameters now. We'll do this both on
5586 * initial mount, once the journal has been initialised but before we've
5587 * done any recovery; and again on any subsequent remount.
5588 */
617ba13b 5589static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
ac27a0ec 5590{
617ba13b 5591 struct ext4_sb_info *sbi = EXT4_SB(sb);
ac27a0ec 5592
30773840
TT
5593 journal->j_commit_interval = sbi->s_commit_interval;
5594 journal->j_min_batch_time = sbi->s_min_batch_time;
5595 journal->j_max_batch_time = sbi->s_max_batch_time;
6866d7b3 5596 ext4_fc_init(sb, journal);
ac27a0ec 5597
a931da6a 5598 write_lock(&journal->j_state_lock);
ac27a0ec 5599 if (test_opt(sb, BARRIER))
dab291af 5600 journal->j_flags |= JBD2_BARRIER;
ac27a0ec 5601 else
dab291af 5602 journal->j_flags &= ~JBD2_BARRIER;
5bf5683a
HK
5603 if (test_opt(sb, DATA_ERR_ABORT))
5604 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5605 else
5606 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
a931da6a 5607 write_unlock(&journal->j_state_lock);
ac27a0ec
DK
5608}
5609
c6cb7e77
EW
5610static struct inode *ext4_get_journal_inode(struct super_block *sb,
5611 unsigned int journal_inum)
ac27a0ec
DK
5612{
5613 struct inode *journal_inode;
ac27a0ec 5614
c6cb7e77
EW
5615 /*
5616 * Test for the existence of a valid inode on disk. Bad things
5617 * happen if we iget() an unused inode, as the subsequent iput()
5618 * will try to delete it.
5619 */
8a363970 5620 journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
1d1fe1ee 5621 if (IS_ERR(journal_inode)) {
b31e1552 5622 ext4_msg(sb, KERN_ERR, "no journal found");
ac27a0ec
DK
5623 return NULL;
5624 }
5625 if (!journal_inode->i_nlink) {
5626 make_bad_inode(journal_inode);
5627 iput(journal_inode);
b31e1552 5628 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
ac27a0ec
DK
5629 return NULL;
5630 }
5631
e5f8eab8 5632 jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
ac27a0ec 5633 journal_inode, journal_inode->i_size);
1d1fe1ee 5634 if (!S_ISREG(journal_inode->i_mode)) {
b31e1552 5635 ext4_msg(sb, KERN_ERR, "invalid journal inode");
ac27a0ec
DK
5636 iput(journal_inode);
5637 return NULL;
5638 }
c6cb7e77
EW
5639 return journal_inode;
5640}
5641
5642static journal_t *ext4_get_journal(struct super_block *sb,
5643 unsigned int journal_inum)
5644{
5645 struct inode *journal_inode;
5646 journal_t *journal;
5647
11215630
JK
5648 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5649 return NULL;
c6cb7e77
EW
5650
5651 journal_inode = ext4_get_journal_inode(sb, journal_inum);
5652 if (!journal_inode)
5653 return NULL;
ac27a0ec 5654
dab291af 5655 journal = jbd2_journal_init_inode(journal_inode);
ac27a0ec 5656 if (!journal) {
b31e1552 5657 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
ac27a0ec
DK
5658 iput(journal_inode);
5659 return NULL;
5660 }
5661 journal->j_private = sb;
617ba13b 5662 ext4_init_journal_params(sb, journal);
ac27a0ec
DK
5663 return journal;
5664}
5665
617ba13b 5666static journal_t *ext4_get_dev_journal(struct super_block *sb,
ac27a0ec
DK
5667 dev_t j_dev)
5668{
2b2d6d01 5669 struct buffer_head *bh;
ac27a0ec 5670 journal_t *journal;
617ba13b
MC
5671 ext4_fsblk_t start;
5672 ext4_fsblk_t len;
ac27a0ec 5673 int hblock, blocksize;
617ba13b 5674 ext4_fsblk_t sb_block;
ac27a0ec 5675 unsigned long offset;
2b2d6d01 5676 struct ext4_super_block *es;
ac27a0ec
DK
5677 struct block_device *bdev;
5678
11215630
JK
5679 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5680 return NULL;
0390131b 5681
b31e1552 5682 bdev = ext4_blkdev_get(j_dev, sb);
ac27a0ec
DK
5683 if (bdev == NULL)
5684 return NULL;
5685
ac27a0ec 5686 blocksize = sb->s_blocksize;
e1defc4f 5687 hblock = bdev_logical_block_size(bdev);
ac27a0ec 5688 if (blocksize < hblock) {
b31e1552
ES
5689 ext4_msg(sb, KERN_ERR,
5690 "blocksize too small for journal device");
ac27a0ec
DK
5691 goto out_bdev;
5692 }
5693
617ba13b
MC
5694 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5695 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
ac27a0ec
DK
5696 set_blocksize(bdev, blocksize);
5697 if (!(bh = __bread(bdev, sb_block, blocksize))) {
b31e1552
ES
5698 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5699 "external journal");
ac27a0ec
DK
5700 goto out_bdev;
5701 }
5702
2716b802 5703 es = (struct ext4_super_block *) (bh->b_data + offset);
617ba13b 5704 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
ac27a0ec 5705 !(le32_to_cpu(es->s_feature_incompat) &
617ba13b 5706 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
b31e1552
ES
5707 ext4_msg(sb, KERN_ERR, "external journal has "
5708 "bad superblock");
ac27a0ec
DK
5709 brelse(bh);
5710 goto out_bdev;
5711 }
5712
df4763be
DW
5713 if ((le32_to_cpu(es->s_feature_ro_compat) &
5714 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5715 es->s_checksum != ext4_superblock_csum(sb, es)) {
5716 ext4_msg(sb, KERN_ERR, "external journal has "
5717 "corrupt superblock");
5718 brelse(bh);
5719 goto out_bdev;
5720 }
5721
617ba13b 5722 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
b31e1552 5723 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
ac27a0ec
DK
5724 brelse(bh);
5725 goto out_bdev;
5726 }
5727
bd81d8ee 5728 len = ext4_blocks_count(es);
ac27a0ec
DK
5729 start = sb_block + 1;
5730 brelse(bh); /* we're done with the superblock */
5731
dab291af 5732 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
ac27a0ec
DK
5733 start, len, blocksize);
5734 if (!journal) {
b31e1552 5735 ext4_msg(sb, KERN_ERR, "failed to create device journal");
ac27a0ec
DK
5736 goto out_bdev;
5737 }
5738 journal->j_private = sb;
2d069c08 5739 if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
b31e1552 5740 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
ac27a0ec
DK
5741 goto out_journal;
5742 }
5743 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
b31e1552
ES
5744 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5745 "user (unsupported) - %d",
ac27a0ec
DK
5746 be32_to_cpu(journal->j_superblock->s_nr_users));
5747 goto out_journal;
5748 }
ee7ed3aa 5749 EXT4_SB(sb)->s_journal_bdev = bdev;
617ba13b 5750 ext4_init_journal_params(sb, journal);
ac27a0ec 5751 return journal;
0b8e58a1 5752
ac27a0ec 5753out_journal:
dab291af 5754 jbd2_journal_destroy(journal);
ac27a0ec 5755out_bdev:
617ba13b 5756 ext4_blkdev_put(bdev);
ac27a0ec
DK
5757 return NULL;
5758}
5759
617ba13b
MC
5760static int ext4_load_journal(struct super_block *sb,
5761 struct ext4_super_block *es,
ac27a0ec
DK
5762 unsigned long journal_devnum)
5763{
5764 journal_t *journal;
5765 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5766 dev_t journal_dev;
5767 int err = 0;
5768 int really_read_only;
273108fa 5769 int journal_dev_ro;
ac27a0ec 5770
11215630
JK
5771 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5772 return -EFSCORRUPTED;
0390131b 5773
ac27a0ec
DK
5774 if (journal_devnum &&
5775 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
b31e1552
ES
5776 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5777 "numbers have changed");
ac27a0ec
DK
5778 journal_dev = new_decode_dev(journal_devnum);
5779 } else
5780 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5781
273108fa
LC
5782 if (journal_inum && journal_dev) {
5783 ext4_msg(sb, KERN_ERR,
5784 "filesystem has both journal inode and journal device!");
5785 return -EINVAL;
5786 }
5787
5788 if (journal_inum) {
5789 journal = ext4_get_journal(sb, journal_inum);
5790 if (!journal)
5791 return -EINVAL;
5792 } else {
5793 journal = ext4_get_dev_journal(sb, journal_dev);
5794 if (!journal)
5795 return -EINVAL;
5796 }
5797
5798 journal_dev_ro = bdev_read_only(journal->j_dev);
5799 really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5800
5801 if (journal_dev_ro && !sb_rdonly(sb)) {
5802 ext4_msg(sb, KERN_ERR,
5803 "journal device read-only, try mounting with '-o ro'");
5804 err = -EROFS;
5805 goto err_out;
5806 }
ac27a0ec
DK
5807
5808 /*
5809 * Are we loading a blank journal or performing recovery after a
5810 * crash? For recovery, we need to check in advance whether we
5811 * can get read-write access to the device.
5812 */
e2b911c5 5813 if (ext4_has_feature_journal_needs_recovery(sb)) {
bc98a42c 5814 if (sb_rdonly(sb)) {
b31e1552
ES
5815 ext4_msg(sb, KERN_INFO, "INFO: recovery "
5816 "required on readonly filesystem");
ac27a0ec 5817 if (really_read_only) {
b31e1552 5818 ext4_msg(sb, KERN_ERR, "write access "
d98bf8cd
SR
5819 "unavailable, cannot proceed "
5820 "(try mounting with noload)");
273108fa
LC
5821 err = -EROFS;
5822 goto err_out;
ac27a0ec 5823 }
b31e1552
ES
5824 ext4_msg(sb, KERN_INFO, "write access will "
5825 "be enabled during recovery");
ac27a0ec
DK
5826 }
5827 }
5828
90576c0b 5829 if (!(journal->j_flags & JBD2_BARRIER))
b31e1552 5830 ext4_msg(sb, KERN_INFO, "barriers disabled");
4776004f 5831
e2b911c5 5832 if (!ext4_has_feature_journal_needs_recovery(sb))
dab291af 5833 err = jbd2_journal_wipe(journal, !really_read_only);
1c13d5c0
TT
5834 if (!err) {
5835 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5836 if (save)
5837 memcpy(save, ((char *) es) +
5838 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
dab291af 5839 err = jbd2_journal_load(journal);
1c13d5c0
TT
5840 if (save)
5841 memcpy(((char *) es) + EXT4_S_ERR_START,
5842 save, EXT4_S_ERR_LEN);
5843 kfree(save);
5844 }
ac27a0ec
DK
5845
5846 if (err) {
b31e1552 5847 ext4_msg(sb, KERN_ERR, "error loading journal");
273108fa 5848 goto err_out;
ac27a0ec
DK
5849 }
5850
617ba13b 5851 EXT4_SB(sb)->s_journal = journal;
11215630
JK
5852 err = ext4_clear_journal_err(sb, es);
5853 if (err) {
5854 EXT4_SB(sb)->s_journal = NULL;
5855 jbd2_journal_destroy(journal);
5856 return err;
5857 }
ac27a0ec 5858
c41303ce 5859 if (!really_read_only && journal_devnum &&
ac27a0ec
DK
5860 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5861 es->s_journal_dev = cpu_to_le32(journal_devnum);
ac27a0ec
DK
5862
5863 /* Make sure we flush the recovery flag to disk. */
4392fbc4 5864 ext4_commit_super(sb);
ac27a0ec
DK
5865 }
5866
5867 return 0;
273108fa
LC
5868
5869err_out:
5870 jbd2_journal_destroy(journal);
5871 return err;
ac27a0ec
DK
5872}
5873
2d01ddc8
JK
5874/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
5875static void ext4_update_super(struct super_block *sb)
ac27a0ec 5876{
c92dc856 5877 struct ext4_sb_info *sbi = EXT4_SB(sb);
e92ad03f
JK
5878 struct ext4_super_block *es = sbi->s_es;
5879 struct buffer_head *sbh = sbi->s_sbh;
a17712c8 5880
05c2c00f 5881 lock_buffer(sbh);
71290b36
TT
5882 /*
5883 * If the file system is mounted read-only, don't update the
5884 * superblock write time. This avoids updating the superblock
5885 * write time when we are mounting the root file system
5886 * read/only but we need to replay the journal; at that point,
5887 * for people who are east of GMT and who make their clock
5888 * tick in localtime for Windows bug-for-bug compatibility,
5889 * the clock is set in the future, and this will cause e2fsck
5890 * to complain and force a full file system check.
5891 */
1751e8a6 5892 if (!(sb->s_flags & SB_RDONLY))
6a0678a7 5893 ext4_update_tstamp(es, s_wtime);
8446fe92 5894 es->s_kbytes_written =
0bc9bc1d 5895 cpu_to_le64(sbi->s_kbytes_written +
8446fe92 5896 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
0bc9bc1d 5897 sbi->s_sectors_written_start) >> 1));
e92ad03f 5898 if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
d5e03cbb 5899 ext4_free_blocks_count_set(es,
e92ad03f
JK
5900 EXT4_C2B(sbi, percpu_counter_sum_positive(
5901 &sbi->s_freeclusters_counter)));
5902 if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
d5e03cbb
TT
5903 es->s_free_inodes_count =
5904 cpu_to_le32(percpu_counter_sum_positive(
e92ad03f 5905 &sbi->s_freeinodes_counter));
c92dc856
JK
5906 /* Copy error information to the on-disk superblock */
5907 spin_lock(&sbi->s_error_lock);
5908 if (sbi->s_add_error_count > 0) {
5909 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
5910 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
5911 __ext4_update_tstamp(&es->s_first_error_time,
5912 &es->s_first_error_time_hi,
5913 sbi->s_first_error_time);
5914 strncpy(es->s_first_error_func, sbi->s_first_error_func,
5915 sizeof(es->s_first_error_func));
5916 es->s_first_error_line =
5917 cpu_to_le32(sbi->s_first_error_line);
5918 es->s_first_error_ino =
5919 cpu_to_le32(sbi->s_first_error_ino);
5920 es->s_first_error_block =
5921 cpu_to_le64(sbi->s_first_error_block);
5922 es->s_first_error_errcode =
5923 ext4_errno_to_code(sbi->s_first_error_code);
5924 }
5925 __ext4_update_tstamp(&es->s_last_error_time,
5926 &es->s_last_error_time_hi,
5927 sbi->s_last_error_time);
5928 strncpy(es->s_last_error_func, sbi->s_last_error_func,
5929 sizeof(es->s_last_error_func));
5930 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
5931 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
5932 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
5933 es->s_last_error_errcode =
5934 ext4_errno_to_code(sbi->s_last_error_code);
5935 /*
5936 * Start the daily error reporting function if it hasn't been
5937 * started already
5938 */
5939 if (!es->s_error_count)
5940 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
5941 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
5942 sbi->s_add_error_count = 0;
5943 }
5944 spin_unlock(&sbi->s_error_lock);
5945
06db49e6 5946 ext4_superblock_csum_set(sb);
2d01ddc8
JK
5947 unlock_buffer(sbh);
5948}
5949
5950static int ext4_commit_super(struct super_block *sb)
5951{
5952 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
5953 int error = 0;
5954
f88f1466
FC
5955 if (!sbh)
5956 return -EINVAL;
5957 if (block_device_ejected(sb))
5958 return -ENODEV;
2d01ddc8
JK
5959
5960 ext4_update_super(sb);
5961
e8680786 5962 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
4743f839
PKS
5963 /*
5964 * Oh, dear. A previous attempt to write the
5965 * superblock failed. This could happen because the
5966 * USB device was yanked out. Or it could happen to
5967 * be a transient write error and maybe the block will
5968 * be remapped. Nothing we can do but to retry the
5969 * write and hope for the best.
5970 */
5971 ext4_msg(sb, KERN_ERR, "previous I/O error to "
5972 "superblock detected");
5973 clear_buffer_write_io_error(sbh);
5974 set_buffer_uptodate(sbh);
5975 }
2d01ddc8 5976 BUFFER_TRACE(sbh, "marking dirty");
ac27a0ec 5977 mark_buffer_dirty(sbh);
4392fbc4
JK
5978 error = __sync_dirty_buffer(sbh,
5979 REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
5980 if (buffer_write_io_error(sbh)) {
5981 ext4_msg(sb, KERN_ERR, "I/O error while writing "
5982 "superblock");
5983 clear_buffer_write_io_error(sbh);
5984 set_buffer_uptodate(sbh);
914258bf 5985 }
c4be0c1d 5986 return error;
ac27a0ec
DK
5987}
5988
ac27a0ec
DK
5989/*
5990 * Have we just finished recovery? If so, and if we are mounting (or
5991 * remounting) the filesystem readonly, then we will end up with a
5992 * consistent fs on disk. Record that fact.
5993 */
11215630
JK
5994static int ext4_mark_recovery_complete(struct super_block *sb,
5995 struct ext4_super_block *es)
ac27a0ec 5996{
11215630 5997 int err;
617ba13b 5998 journal_t *journal = EXT4_SB(sb)->s_journal;
ac27a0ec 5999
e2b911c5 6000 if (!ext4_has_feature_journal(sb)) {
11215630
JK
6001 if (journal != NULL) {
6002 ext4_error(sb, "Journal got removed while the fs was "
6003 "mounted!");
6004 return -EFSCORRUPTED;
6005 }
6006 return 0;
0390131b 6007 }
dab291af 6008 jbd2_journal_lock_updates(journal);
01d5d965 6009 err = jbd2_journal_flush(journal, 0);
11215630 6010 if (err < 0)
7ffe1ea8
HK
6011 goto out;
6012
02f310fc
JK
6013 if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6014 ext4_has_feature_orphan_present(sb))) {
6015 if (!ext4_orphan_file_empty(sb)) {
6016 ext4_error(sb, "Orphan file not empty on read-only fs.");
6017 err = -EFSCORRUPTED;
6018 goto out;
6019 }
e2b911c5 6020 ext4_clear_feature_journal_needs_recovery(sb);
02f310fc 6021 ext4_clear_feature_orphan_present(sb);
4392fbc4 6022 ext4_commit_super(sb);
ac27a0ec 6023 }
7ffe1ea8 6024out:
dab291af 6025 jbd2_journal_unlock_updates(journal);
11215630 6026 return err;
ac27a0ec
DK
6027}
6028
6029/*
6030 * If we are mounting (or read-write remounting) a filesystem whose journal
6031 * has recorded an error from a previous lifetime, move that error to the
6032 * main filesystem now.
6033 */
11215630 6034static int ext4_clear_journal_err(struct super_block *sb,
2b2d6d01 6035 struct ext4_super_block *es)
ac27a0ec
DK
6036{
6037 journal_t *journal;
6038 int j_errno;
6039 const char *errstr;
6040
11215630
JK
6041 if (!ext4_has_feature_journal(sb)) {
6042 ext4_error(sb, "Journal got removed while the fs was mounted!");
6043 return -EFSCORRUPTED;
6044 }
0390131b 6045
617ba13b 6046 journal = EXT4_SB(sb)->s_journal;
ac27a0ec
DK
6047
6048 /*
6049 * Now check for any error status which may have been recorded in the
617ba13b 6050 * journal by a prior ext4_error() or ext4_abort()
ac27a0ec
DK
6051 */
6052
dab291af 6053 j_errno = jbd2_journal_errno(journal);
ac27a0ec
DK
6054 if (j_errno) {
6055 char nbuf[16];
6056
617ba13b 6057 errstr = ext4_decode_error(sb, j_errno, nbuf);
12062ddd 6058 ext4_warning(sb, "Filesystem error recorded "
ac27a0ec 6059 "from previous mount: %s", errstr);
12062ddd 6060 ext4_warning(sb, "Marking fs in need of filesystem check.");
ac27a0ec 6061
617ba13b
MC
6062 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6063 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
4392fbc4 6064 ext4_commit_super(sb);
ac27a0ec 6065
dab291af 6066 jbd2_journal_clear_err(journal);
d796c52e 6067 jbd2_journal_update_sb_errno(journal);
ac27a0ec 6068 }
11215630 6069 return 0;
ac27a0ec
DK
6070}
6071
6072/*
6073 * Force the running and committing transactions to commit,
6074 * and wait on the commit.
6075 */
617ba13b 6076int ext4_force_commit(struct super_block *sb)
ac27a0ec
DK
6077{
6078 journal_t *journal;
ac27a0ec 6079
bc98a42c 6080 if (sb_rdonly(sb))
ac27a0ec
DK
6081 return 0;
6082
617ba13b 6083 journal = EXT4_SB(sb)->s_journal;
b1deefc9 6084 return ext4_journal_force_commit(journal);
ac27a0ec
DK
6085}
6086
617ba13b 6087static int ext4_sync_fs(struct super_block *sb, int wait)
ac27a0ec 6088{
14ce0cb4 6089 int ret = 0;
9eddacf9 6090 tid_t target;
06a407f1 6091 bool needs_barrier = false;
8d5d02e6 6092 struct ext4_sb_info *sbi = EXT4_SB(sb);
ac27a0ec 6093
49598e04 6094 if (unlikely(ext4_forced_shutdown(sbi)))
0db1ff22
TT
6095 return 0;
6096
9bffad1e 6097 trace_ext4_sync_fs(sb, wait);
2e8fa54e 6098 flush_workqueue(sbi->rsv_conversion_wq);
a1177825
JK
6099 /*
6100 * Writeback quota in non-journalled quota case - journalled quota has
6101 * no dirty dquots
6102 */
6103 dquot_writeback_dquots(sb, -1);
06a407f1
DM
6104 /*
6105 * Data writeback is possible w/o journal transaction, so barrier must
6106 * being sent at the end of the function. But we can skip it if
6107 * transaction_commit will do it for us.
6108 */
bda32530
TT
6109 if (sbi->s_journal) {
6110 target = jbd2_get_latest_transaction(sbi->s_journal);
6111 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6112 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6113 needs_barrier = true;
6114
6115 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6116 if (wait)
6117 ret = jbd2_log_wait_commit(sbi->s_journal,
6118 target);
6119 }
6120 } else if (wait && test_opt(sb, BARRIER))
06a407f1 6121 needs_barrier = true;
06a407f1
DM
6122 if (needs_barrier) {
6123 int err;
c6bf3f0e 6124 err = blkdev_issue_flush(sb->s_bdev);
06a407f1
DM
6125 if (!ret)
6126 ret = err;
0390131b 6127 }
06a407f1
DM
6128
6129 return ret;
6130}
6131
ac27a0ec
DK
6132/*
6133 * LVM calls this function before a (read-only) snapshot is created. This
6134 * gives us a chance to flush the journal completely and mark the fs clean.
be4f27d3
YY
6135 *
6136 * Note that only this function cannot bring a filesystem to be in a clean
8e8ad8a5
JK
6137 * state independently. It relies on upper layer to stop all data & metadata
6138 * modifications.
ac27a0ec 6139 */
c4be0c1d 6140static int ext4_freeze(struct super_block *sb)
ac27a0ec 6141{
c4be0c1d
TS
6142 int error = 0;
6143 journal_t *journal;
ac27a0ec 6144
bc98a42c 6145 if (sb_rdonly(sb))
9ca92389 6146 return 0;
ac27a0ec 6147
9ca92389 6148 journal = EXT4_SB(sb)->s_journal;
7ffe1ea8 6149
bb044576
TT
6150 if (journal) {
6151 /* Now we set up the journal barrier. */
6152 jbd2_journal_lock_updates(journal);
ac27a0ec 6153
bb044576
TT
6154 /*
6155 * Don't clear the needs_recovery flag if we failed to
6156 * flush the journal.
6157 */
01d5d965 6158 error = jbd2_journal_flush(journal, 0);
bb044576
TT
6159 if (error < 0)
6160 goto out;
c642dc9e
ES
6161
6162 /* Journal blocked and flushed, clear needs_recovery flag. */
e2b911c5 6163 ext4_clear_feature_journal_needs_recovery(sb);
02f310fc
JK
6164 if (ext4_orphan_file_empty(sb))
6165 ext4_clear_feature_orphan_present(sb);
bb044576 6166 }
9ca92389 6167
4392fbc4 6168 error = ext4_commit_super(sb);
6b0310fb 6169out:
bb044576
TT
6170 if (journal)
6171 /* we rely on upper layer to stop further updates */
6172 jbd2_journal_unlock_updates(journal);
6b0310fb 6173 return error;
ac27a0ec
DK
6174}
6175
6176/*
6177 * Called by LVM after the snapshot is done. We need to reset the RECOVER
6178 * flag here, even though the filesystem is not technically dirty yet.
6179 */
c4be0c1d 6180static int ext4_unfreeze(struct super_block *sb)
ac27a0ec 6181{
bc98a42c 6182 if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
9ca92389
TT
6183 return 0;
6184
c642dc9e
ES
6185 if (EXT4_SB(sb)->s_journal) {
6186 /* Reset the needs_recovery flag before the fs is unlocked. */
e2b911c5 6187 ext4_set_feature_journal_needs_recovery(sb);
02f310fc
JK
6188 if (ext4_has_feature_orphan_file(sb))
6189 ext4_set_feature_orphan_present(sb);
c642dc9e
ES
6190 }
6191
4392fbc4 6192 ext4_commit_super(sb);
c4be0c1d 6193 return 0;
ac27a0ec
DK
6194}
6195
673c6100
TT
6196/*
6197 * Structure to save mount options for ext4_remount's benefit
6198 */
6199struct ext4_mount_options {
6200 unsigned long s_mount_opt;
a2595b8a 6201 unsigned long s_mount_opt2;
08cefc7a
EB
6202 kuid_t s_resuid;
6203 kgid_t s_resgid;
673c6100
TT
6204 unsigned long s_commit_interval;
6205 u32 s_min_batch_time, s_max_batch_time;
6206#ifdef CONFIG_QUOTA
6207 int s_jquota_fmt;
a2d4a646 6208 char *s_qf_names[EXT4_MAXQUOTAS];
673c6100
TT
6209#endif
6210};
6211
2b2d6d01 6212static int ext4_remount(struct super_block *sb, int *flags, char *data)
ac27a0ec 6213{
2b2d6d01 6214 struct ext4_super_block *es;
617ba13b 6215 struct ext4_sb_info *sbi = EXT4_SB(sb);
f25391eb 6216 unsigned long old_sb_flags, vfs_flags;
617ba13b 6217 struct ext4_mount_options old_opts;
8a266467 6218 ext4_group_t g;
c5e06d10 6219 int err = 0;
ac27a0ec 6220#ifdef CONFIG_QUOTA
3bbef91b 6221 int enable_quota = 0;
03dafb5f 6222 int i, j;
33458eab 6223 char *to_free[EXT4_MAXQUOTAS];
ac27a0ec 6224#endif
d4c402d9 6225 char *orig_data = kstrdup(data, GFP_KERNEL);
461c3af0 6226 struct ext4_fs_context parsed_opts;
b237e304
HS
6227
6228 parsed_opts.journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6229 parsed_opts.journal_devnum = 0;
ac27a0ec 6230
21ac738e
CX
6231 if (data && !orig_data)
6232 return -ENOMEM;
6233
ac27a0ec
DK
6234 /* Store the original options */
6235 old_sb_flags = sb->s_flags;
6236 old_opts.s_mount_opt = sbi->s_mount_opt;
a2595b8a 6237 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
ac27a0ec
DK
6238 old_opts.s_resuid = sbi->s_resuid;
6239 old_opts.s_resgid = sbi->s_resgid;
6240 old_opts.s_commit_interval = sbi->s_commit_interval;
30773840
TT
6241 old_opts.s_min_batch_time = sbi->s_min_batch_time;
6242 old_opts.s_max_batch_time = sbi->s_max_batch_time;
ac27a0ec
DK
6243#ifdef CONFIG_QUOTA
6244 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
a2d4a646 6245 for (i = 0; i < EXT4_MAXQUOTAS; i++)
03dafb5f 6246 if (sbi->s_qf_names[i]) {
33458eab
TT
6247 char *qf_name = get_qf_name(sb, sbi, i);
6248
6249 old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
03dafb5f
CG
6250 if (!old_opts.s_qf_names[i]) {
6251 for (j = 0; j < i; j++)
6252 kfree(old_opts.s_qf_names[j]);
3e36a163 6253 kfree(orig_data);
03dafb5f
CG
6254 return -ENOMEM;
6255 }
6256 } else
6257 old_opts.s_qf_names[i] = NULL;
ac27a0ec 6258#endif
b3881f74 6259 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
b237e304
HS
6260 parsed_opts.journal_ioprio =
6261 sbi->s_journal->j_task->io_context->ioprio;
ac27a0ec 6262
f25391eb
LC
6263 /*
6264 * Some options can be enabled by ext4 and/or by VFS mount flag
6265 * either way we need to make sure it matches in both *flags and
6266 * s_flags. Copy those selected flags from *flags to s_flags
6267 */
6268 vfs_flags = SB_LAZYTIME | SB_I_VERSION;
6269 sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
6270
b237e304 6271 if (!parse_options(data, sb, &parsed_opts, 1)) {
ac27a0ec
DK
6272 err = -EINVAL;
6273 goto restore_opts;
6274 }
6275
6b992ff2 6276 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
c6d3d56d
DW
6277 test_opt(sb, JOURNAL_CHECKSUM)) {
6278 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
2d5b86e0
ES
6279 "during remount not supported; ignoring");
6280 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6b992ff2
DW
6281 }
6282
6ae6514b
PS
6283 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6284 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6285 ext4_msg(sb, KERN_ERR, "can't mount with "
6286 "both data=journal and delalloc");
6287 err = -EINVAL;
6288 goto restore_opts;
6289 }
6290 if (test_opt(sb, DIOREAD_NOLOCK)) {
6291 ext4_msg(sb, KERN_ERR, "can't mount with "
6292 "both data=journal and dioread_nolock");
6293 err = -EINVAL;
6294 goto restore_opts;
6295 }
ab04df78
JK
6296 } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6297 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6298 ext4_msg(sb, KERN_ERR, "can't mount with "
6299 "journal_async_commit in data=ordered mode");
6300 err = -EINVAL;
6301 goto restore_opts;
6302 }
923ae0ff
RZ
6303 }
6304
cdb7ee4c
TE
6305 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6306 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6307 err = -EINVAL;
6308 goto restore_opts;
6309 }
6310
9b5f6c9b 6311 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
124e7c61 6312 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
ac27a0ec 6313
1751e8a6
LT
6314 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6315 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
ac27a0ec
DK
6316
6317 es = sbi->s_es;
6318
b3881f74 6319 if (sbi->s_journal) {
0390131b 6320 ext4_init_journal_params(sb, sbi->s_journal);
b237e304 6321 set_task_ioprio(sbi->s_journal->j_task, parsed_opts.journal_ioprio);
b3881f74 6322 }
ac27a0ec 6323
c92dc856
JK
6324 /* Flush outstanding errors before changing fs state */
6325 flush_work(&sbi->s_error_work);
6326
1751e8a6 6327 if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
9b5f6c9b 6328 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
ac27a0ec
DK
6329 err = -EROFS;
6330 goto restore_opts;
6331 }
6332
1751e8a6 6333 if (*flags & SB_RDONLY) {
38c03b34
TT
6334 err = sync_filesystem(sb);
6335 if (err < 0)
6336 goto restore_opts;
0f0dd62f
CH
6337 err = dquot_suspend(sb, -1);
6338 if (err < 0)
c79d967d 6339 goto restore_opts;
c79d967d 6340
ac27a0ec
DK
6341 /*
6342 * First of all, the unconditional stuff we have to do
6343 * to disable replay of the journal when we next remount
6344 */
1751e8a6 6345 sb->s_flags |= SB_RDONLY;
ac27a0ec
DK
6346
6347 /*
6348 * OK, test if we are remounting a valid rw partition
6349 * readonly, and if so set the rdonly flag and then
6350 * mark the partition as valid again.
6351 */
617ba13b
MC
6352 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6353 (sbi->s_mount_state & EXT4_VALID_FS))
ac27a0ec
DK
6354 es->s_state = cpu_to_le16(sbi->s_mount_state);
6355
11215630
JK
6356 if (sbi->s_journal) {
6357 /*
6358 * We let remount-ro finish even if marking fs
6359 * as clean failed...
6360 */
0390131b 6361 ext4_mark_recovery_complete(sb, es);
11215630 6362 }
ac27a0ec 6363 } else {
a13fb1a4 6364 /* Make sure we can mount this feature set readwrite */
e2b911c5 6365 if (ext4_has_feature_readonly(sb) ||
2cb5cc8b 6366 !ext4_feature_set_ok(sb, 0)) {
ac27a0ec
DK
6367 err = -EROFS;
6368 goto restore_opts;
6369 }
8a266467
TT
6370 /*
6371 * Make sure the group descriptor checksums
0b8e58a1 6372 * are sane. If they aren't, refuse to remount r/w.
8a266467
TT
6373 */
6374 for (g = 0; g < sbi->s_groups_count; g++) {
6375 struct ext4_group_desc *gdp =
6376 ext4_get_group_desc(sb, g, NULL);
6377
feb0ab32 6378 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
b31e1552
ES
6379 ext4_msg(sb, KERN_ERR,
6380 "ext4_remount: Checksum for group %u failed (%u!=%u)",
e2b911c5 6381 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
8a266467 6382 le16_to_cpu(gdp->bg_checksum));
6a797d27 6383 err = -EFSBADCRC;
8a266467
TT
6384 goto restore_opts;
6385 }
6386 }
6387
ead6596b
ES
6388 /*
6389 * If we have an unprocessed orphan list hanging
6390 * around from a previously readonly bdev mount,
6391 * require a full umount/remount for now.
6392 */
02f310fc 6393 if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
b31e1552 6394 ext4_msg(sb, KERN_WARNING, "Couldn't "
ead6596b
ES
6395 "remount RDWR because of unprocessed "
6396 "orphan inode list. Please "
b31e1552 6397 "umount/remount instead");
ead6596b
ES
6398 err = -EINVAL;
6399 goto restore_opts;
6400 }
6401
ac27a0ec
DK
6402 /*
6403 * Mounting a RDONLY partition read-write, so reread
6404 * and store the current valid flag. (It may have
6405 * been changed by e2fsck since we originally mounted
6406 * the partition.)
6407 */
11215630
JK
6408 if (sbi->s_journal) {
6409 err = ext4_clear_journal_err(sb, es);
6410 if (err)
6411 goto restore_opts;
6412 }
ac27a0ec 6413 sbi->s_mount_state = le16_to_cpu(es->s_state);
c89128a0
JK
6414
6415 err = ext4_setup_super(sb, es, 0);
6416 if (err)
6417 goto restore_opts;
6418
6419 sb->s_flags &= ~SB_RDONLY;
e2b911c5 6420 if (ext4_has_feature_mmp(sb))
c5e06d10
JL
6421 if (ext4_multi_mount_protect(sb,
6422 le64_to_cpu(es->s_mmp_block))) {
6423 err = -EROFS;
6424 goto restore_opts;
6425 }
3bbef91b 6426#ifdef CONFIG_QUOTA
c79d967d 6427 enable_quota = 1;
3bbef91b 6428#endif
ac27a0ec
DK
6429 }
6430 }
bfff6873
LC
6431
6432 /*
6433 * Reinitialize lazy itable initialization thread based on
6434 * current settings
6435 */
bc98a42c 6436 if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
bfff6873
LC
6437 ext4_unregister_li_request(sb);
6438 else {
6439 ext4_group_t first_not_zeroed;
6440 first_not_zeroed = ext4_has_uninit_itable(sb);
6441 ext4_register_li_request(sb, first_not_zeroed);
6442 }
6443
0f5bde1d
JK
6444 /*
6445 * Handle creation of system zone data early because it can fail.
6446 * Releasing of existing data is done when we are sure remount will
6447 * succeed.
6448 */
dd0db94f 6449 if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
0f5bde1d
JK
6450 err = ext4_setup_system_zone(sb);
6451 if (err)
6452 goto restore_opts;
6453 }
d176b1f6 6454
c89128a0 6455 if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
4392fbc4 6456 err = ext4_commit_super(sb);
c89128a0
JK
6457 if (err)
6458 goto restore_opts;
6459 }
0390131b 6460
ac27a0ec
DK
6461#ifdef CONFIG_QUOTA
6462 /* Release old quota file names */
a2d4a646 6463 for (i = 0; i < EXT4_MAXQUOTAS; i++)
03dafb5f 6464 kfree(old_opts.s_qf_names[i]);
7c319d32
AK
6465 if (enable_quota) {
6466 if (sb_any_quota_suspended(sb))
6467 dquot_resume(sb, -1);
e2b911c5 6468 else if (ext4_has_feature_quota(sb)) {
7c319d32 6469 err = ext4_enable_quotas(sb);
07724f98 6470 if (err)
7c319d32 6471 goto restore_opts;
7c319d32
AK
6472 }
6473 }
ac27a0ec 6474#endif
dd0db94f 6475 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
0f5bde1d 6476 ext4_release_system_zone(sb);
d4c402d9 6477
61bb4a1c
TT
6478 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6479 ext4_stop_mmpd(sbi);
6480
f25391eb
LC
6481 /*
6482 * Some options can be enabled by ext4 and/or by VFS mount flag
6483 * either way we need to make sure it matches in both *flags and
6484 * s_flags. Copy those selected flags from s_flags to *flags
6485 */
6486 *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
d4c402d9 6487
ca9b404f
RA
6488 ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.",
6489 orig_data, ext4_quota_mode(sb));
d4c402d9 6490 kfree(orig_data);
ac27a0ec 6491 return 0;
0b8e58a1 6492
ac27a0ec
DK
6493restore_opts:
6494 sb->s_flags = old_sb_flags;
6495 sbi->s_mount_opt = old_opts.s_mount_opt;
a2595b8a 6496 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
ac27a0ec
DK
6497 sbi->s_resuid = old_opts.s_resuid;
6498 sbi->s_resgid = old_opts.s_resgid;
6499 sbi->s_commit_interval = old_opts.s_commit_interval;
30773840
TT
6500 sbi->s_min_batch_time = old_opts.s_min_batch_time;
6501 sbi->s_max_batch_time = old_opts.s_max_batch_time;
dd0db94f 6502 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
0f5bde1d 6503 ext4_release_system_zone(sb);
ac27a0ec
DK
6504#ifdef CONFIG_QUOTA
6505 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
a2d4a646 6506 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
33458eab
TT
6507 to_free[i] = get_qf_name(sb, sbi, i);
6508 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
ac27a0ec 6509 }
33458eab
TT
6510 synchronize_rcu();
6511 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6512 kfree(to_free[i]);
ac27a0ec 6513#endif
61bb4a1c
TT
6514 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6515 ext4_stop_mmpd(sbi);
d4c402d9 6516 kfree(orig_data);
ac27a0ec
DK
6517 return err;
6518}
6519
689c958c
LX
6520#ifdef CONFIG_QUOTA
6521static int ext4_statfs_project(struct super_block *sb,
6522 kprojid_t projid, struct kstatfs *buf)
6523{
6524 struct kqid qid;
6525 struct dquot *dquot;
6526 u64 limit;
6527 u64 curblock;
6528
6529 qid = make_kqid_projid(projid);
6530 dquot = dqget(sb, qid);
6531 if (IS_ERR(dquot))
6532 return PTR_ERR(dquot);
7b9ca4c6 6533 spin_lock(&dquot->dq_dqb_lock);
689c958c 6534
a08fe66e
CX
6535 limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6536 dquot->dq_dqb.dqb_bhardlimit);
57c32ea4
CX
6537 limit >>= sb->s_blocksize_bits;
6538
689c958c 6539 if (limit && buf->f_blocks > limit) {
f06925c7
KK
6540 curblock = (dquot->dq_dqb.dqb_curspace +
6541 dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
689c958c
LX
6542 buf->f_blocks = limit;
6543 buf->f_bfree = buf->f_bavail =
6544 (buf->f_blocks > curblock) ?
6545 (buf->f_blocks - curblock) : 0;
6546 }
6547
a08fe66e
CX
6548 limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6549 dquot->dq_dqb.dqb_ihardlimit);
689c958c
LX
6550 if (limit && buf->f_files > limit) {
6551 buf->f_files = limit;
6552 buf->f_ffree =
6553 (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6554 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6555 }
6556
7b9ca4c6 6557 spin_unlock(&dquot->dq_dqb_lock);
689c958c
LX
6558 dqput(dquot);
6559 return 0;
6560}
6561#endif
6562
2b2d6d01 6563static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
ac27a0ec
DK
6564{
6565 struct super_block *sb = dentry->d_sb;
617ba13b
MC
6566 struct ext4_sb_info *sbi = EXT4_SB(sb);
6567 struct ext4_super_block *es = sbi->s_es;
27dd4385 6568 ext4_fsblk_t overhead = 0, resv_blocks;
d02a9391 6569 s64 bfree;
27dd4385 6570 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
ac27a0ec 6571
952fc18e
TT
6572 if (!test_opt(sb, MINIX_DF))
6573 overhead = sbi->s_overhead;
ac27a0ec 6574
617ba13b 6575 buf->f_type = EXT4_SUPER_MAGIC;
ac27a0ec 6576 buf->f_bsize = sb->s_blocksize;
b72f78cb 6577 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
57042651
TT
6578 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6579 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
d02a9391 6580 /* prevent underflow in case that few free space is available */
57042651 6581 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
27dd4385
LC
6582 buf->f_bavail = buf->f_bfree -
6583 (ext4_r_blocks_count(es) + resv_blocks);
6584 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
ac27a0ec
DK
6585 buf->f_bavail = 0;
6586 buf->f_files = le32_to_cpu(es->s_inodes_count);
52d9f3b4 6587 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
617ba13b 6588 buf->f_namelen = EXT4_NAME_LEN;
9591c3a3 6589 buf->f_fsid = uuid_to_fsid(es->s_uuid);
0b8e58a1 6590
689c958c
LX
6591#ifdef CONFIG_QUOTA
6592 if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6593 sb_has_quota_limits_enabled(sb, PRJQUOTA))
6594 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6595#endif
ac27a0ec
DK
6596 return 0;
6597}
6598
ac27a0ec
DK
6599
6600#ifdef CONFIG_QUOTA
6601
bc8230ee
JK
6602/*
6603 * Helper functions so that transaction is started before we acquire dqio_sem
6604 * to keep correct lock ordering of transaction > dqio_sem
6605 */
ac27a0ec
DK
6606static inline struct inode *dquot_to_inode(struct dquot *dquot)
6607{
4c376dca 6608 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
ac27a0ec
DK
6609}
6610
617ba13b 6611static int ext4_write_dquot(struct dquot *dquot)
ac27a0ec
DK
6612{
6613 int ret, err;
6614 handle_t *handle;
6615 struct inode *inode;
6616
6617 inode = dquot_to_inode(dquot);
9924a92a 6618 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
0b8e58a1 6619 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
ac27a0ec
DK
6620 if (IS_ERR(handle))
6621 return PTR_ERR(handle);
6622 ret = dquot_commit(dquot);
617ba13b 6623 err = ext4_journal_stop(handle);
ac27a0ec
DK
6624 if (!ret)
6625 ret = err;
6626 return ret;
6627}
6628
617ba13b 6629static int ext4_acquire_dquot(struct dquot *dquot)
ac27a0ec
DK
6630{
6631 int ret, err;
6632 handle_t *handle;
6633
9924a92a 6634 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
0b8e58a1 6635 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
ac27a0ec
DK
6636 if (IS_ERR(handle))
6637 return PTR_ERR(handle);
6638 ret = dquot_acquire(dquot);
617ba13b 6639 err = ext4_journal_stop(handle);
ac27a0ec
DK
6640 if (!ret)
6641 ret = err;
6642 return ret;
6643}
6644
617ba13b 6645static int ext4_release_dquot(struct dquot *dquot)
ac27a0ec
DK
6646{
6647 int ret, err;
6648 handle_t *handle;
6649
9924a92a 6650 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
0b8e58a1 6651 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
9c3013e9
JK
6652 if (IS_ERR(handle)) {
6653 /* Release dquot anyway to avoid endless cycle in dqput() */
6654 dquot_release(dquot);
ac27a0ec 6655 return PTR_ERR(handle);
9c3013e9 6656 }
ac27a0ec 6657 ret = dquot_release(dquot);
617ba13b 6658 err = ext4_journal_stop(handle);
ac27a0ec
DK
6659 if (!ret)
6660 ret = err;
6661 return ret;
6662}
6663
617ba13b 6664static int ext4_mark_dquot_dirty(struct dquot *dquot)
ac27a0ec 6665{
262b4662 6666 struct super_block *sb = dquot->dq_sb;
262b4662 6667
f177ee08 6668 if (ext4_is_quota_journalled(sb)) {
ac27a0ec 6669 dquot_mark_dquot_dirty(dquot);
617ba13b 6670 return ext4_write_dquot(dquot);
ac27a0ec
DK
6671 } else {
6672 return dquot_mark_dquot_dirty(dquot);
6673 }
6674}
6675
617ba13b 6676static int ext4_write_info(struct super_block *sb, int type)
ac27a0ec
DK
6677{
6678 int ret, err;
6679 handle_t *handle;
6680
6681 /* Data block + inode block */
2b0143b5 6682 handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
ac27a0ec
DK
6683 if (IS_ERR(handle))
6684 return PTR_ERR(handle);
6685 ret = dquot_commit_info(sb, type);
617ba13b 6686 err = ext4_journal_stop(handle);
ac27a0ec
DK
6687 if (!ret)
6688 ret = err;
6689 return ret;
6690}
6691
daf647d2
TT
6692static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6693{
6694 struct ext4_inode_info *ei = EXT4_I(inode);
6695
6696 /* The first argument of lockdep_set_subclass has to be
6697 * *exactly* the same as the argument to init_rwsem() --- in
6698 * this case, in init_once() --- or lockdep gets unhappy
6699 * because the name of the lock is set using the
6700 * stringification of the argument to init_rwsem().
6701 */
6702 (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
6703 lockdep_set_subclass(&ei->i_data_sem, subclass);
6704}
6705
ac27a0ec
DK
6706/*
6707 * Standard function to be called on quota_on
6708 */
617ba13b 6709static int ext4_quota_on(struct super_block *sb, int type, int format_id,
8c54ca9c 6710 const struct path *path)
ac27a0ec
DK
6711{
6712 int err;
ac27a0ec
DK
6713
6714 if (!test_opt(sb, QUOTA))
6715 return -EINVAL;
0623543b 6716
ac27a0ec 6717 /* Quotafile not on the same filesystem? */
d8c9584e 6718 if (path->dentry->d_sb != sb)
ac27a0ec 6719 return -EXDEV;
e0770e91
JK
6720
6721 /* Quota already enabled for this file? */
6722 if (IS_NOQUOTA(d_inode(path->dentry)))
6723 return -EBUSY;
6724
0623543b
JK
6725 /* Journaling quota? */
6726 if (EXT4_SB(sb)->s_qf_names[type]) {
2b2d6d01 6727 /* Quotafile not in fs root? */
f00c9e44 6728 if (path->dentry->d_parent != sb->s_root)
b31e1552
ES
6729 ext4_msg(sb, KERN_WARNING,
6730 "Quota file not on filesystem root. "
6731 "Journaled quota will not work");
91389240
JK
6732 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6733 } else {
6734 /*
6735 * Clear the flag just in case mount options changed since
6736 * last time.
6737 */
6738 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
2b2d6d01 6739 }
0623543b
JK
6740
6741 /*
6742 * When we journal data on quota file, we have to flush journal to see
6743 * all updates to the file when we bypass pagecache...
6744 */
0390131b 6745 if (EXT4_SB(sb)->s_journal &&
2b0143b5 6746 ext4_should_journal_data(d_inode(path->dentry))) {
0623543b
JK
6747 /*
6748 * We don't need to lock updates but journal_flush() could
6749 * otherwise be livelocked...
6750 */
6751 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
01d5d965 6752 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
0623543b 6753 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
f00c9e44 6754 if (err)
7ffe1ea8 6755 return err;
0623543b 6756 }
957153fc 6757
daf647d2
TT
6758 lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6759 err = dquot_quota_on(sb, type, format_id, path);
957153fc 6760 if (err) {
daf647d2
TT
6761 lockdep_set_quota_inode(path->dentry->d_inode,
6762 I_DATA_SEM_NORMAL);
957153fc
JK
6763 } else {
6764 struct inode *inode = d_inode(path->dentry);
6765 handle_t *handle;
6766
61a92987
JK
6767 /*
6768 * Set inode flags to prevent userspace from messing with quota
6769 * files. If this fails, we return success anyway since quotas
6770 * are already enabled and this is not a hard failure.
6771 */
957153fc
JK
6772 inode_lock(inode);
6773 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6774 if (IS_ERR(handle))
6775 goto unlock_inode;
6776 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6777 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6778 S_NOATIME | S_IMMUTABLE);
4209ae12 6779 err = ext4_mark_inode_dirty(handle, inode);
957153fc
JK
6780 ext4_journal_stop(handle);
6781 unlock_inode:
6782 inode_unlock(inode);
6783 }
daf647d2 6784 return err;
ac27a0ec
DK
6785}
6786
7c319d32
AK
6787static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
6788 unsigned int flags)
6789{
6790 int err;
6791 struct inode *qf_inode;
a2d4a646 6792 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7c319d32 6793 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
689c958c
LX
6794 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6795 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7c319d32
AK
6796 };
6797
e2b911c5 6798 BUG_ON(!ext4_has_feature_quota(sb));
7c319d32
AK
6799
6800 if (!qf_inums[type])
6801 return -EPERM;
6802
8a363970 6803 qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7c319d32
AK
6804 if (IS_ERR(qf_inode)) {
6805 ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
6806 return PTR_ERR(qf_inode);
6807 }
6808
bcb13850
JK
6809 /* Don't account quota for quota files to avoid recursion */
6810 qf_inode->i_flags |= S_NOQUOTA;
daf647d2 6811 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7212b95e 6812 err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
daf647d2
TT
6813 if (err)
6814 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
61157b24 6815 iput(qf_inode);
7c319d32
AK
6816
6817 return err;
6818}
6819
6820/* Enable usage tracking for all quota types. */
25c6d98f 6821int ext4_enable_quotas(struct super_block *sb)
7c319d32
AK
6822{
6823 int type, err = 0;
a2d4a646 6824 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7c319d32 6825 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
689c958c
LX
6826 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6827 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7c319d32 6828 };
49da9392
JK
6829 bool quota_mopt[EXT4_MAXQUOTAS] = {
6830 test_opt(sb, USRQUOTA),
6831 test_opt(sb, GRPQUOTA),
6832 test_opt(sb, PRJQUOTA),
6833 };
7c319d32 6834
91389240 6835 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
a2d4a646 6836 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7c319d32
AK
6837 if (qf_inums[type]) {
6838 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
49da9392
JK
6839 DQUOT_USAGE_ENABLED |
6840 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7c319d32
AK
6841 if (err) {
6842 ext4_warning(sb,
72ba7450
TT
6843 "Failed to enable quota tracking "
6844 "(type=%d, err=%d). Please run "
6845 "e2fsck to fix.", type, err);
7f144fd0
JU
6846 for (type--; type >= 0; type--)
6847 dquot_quota_off(sb, type);
6848
7c319d32
AK
6849 return err;
6850 }
6851 }
6852 }
6853 return 0;
6854}
6855
ca0e05e4
DM
6856static int ext4_quota_off(struct super_block *sb, int type)
6857{
21f97697
JK
6858 struct inode *inode = sb_dqopt(sb)->files[type];
6859 handle_t *handle;
957153fc 6860 int err;
21f97697 6861
87009d86
DM
6862 /* Force all delayed allocation blocks to be allocated.
6863 * Caller already holds s_umount sem */
6864 if (test_opt(sb, DELALLOC))
ca0e05e4 6865 sync_filesystem(sb);
ca0e05e4 6866
957153fc 6867 if (!inode || !igrab(inode))
0b268590
AG
6868 goto out;
6869
957153fc 6870 err = dquot_quota_off(sb, type);
964edf66 6871 if (err || ext4_has_feature_quota(sb))
957153fc
JK
6872 goto out_put;
6873
6874 inode_lock(inode);
61a92987
JK
6875 /*
6876 * Update modification times of quota files when userspace can
6877 * start looking at them. If we fail, we return success anyway since
6878 * this is not a hard failure and quotas are already disabled.
6879 */
9924a92a 6880 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
4209ae12
HS
6881 if (IS_ERR(handle)) {
6882 err = PTR_ERR(handle);
957153fc 6883 goto out_unlock;
4209ae12 6884 }
957153fc
JK
6885 EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
6886 inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
eeca7ea1 6887 inode->i_mtime = inode->i_ctime = current_time(inode);
4209ae12 6888 err = ext4_mark_inode_dirty(handle, inode);
21f97697 6889 ext4_journal_stop(handle);
957153fc
JK
6890out_unlock:
6891 inode_unlock(inode);
6892out_put:
964edf66 6893 lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
957153fc
JK
6894 iput(inode);
6895 return err;
21f97697 6896out:
ca0e05e4
DM
6897 return dquot_quota_off(sb, type);
6898}
6899
ac27a0ec
DK
6900/* Read data from quotafile - avoid pagecache and such because we cannot afford
6901 * acquiring the locks... As quota files are never truncated and quota code
25985edc 6902 * itself serializes the operations (and no one else should touch the files)
ac27a0ec 6903 * we don't have to be afraid of races */
617ba13b 6904static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
ac27a0ec
DK
6905 size_t len, loff_t off)
6906{
6907 struct inode *inode = sb_dqopt(sb)->files[type];
725d26d3 6908 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
ac27a0ec
DK
6909 int offset = off & (sb->s_blocksize - 1);
6910 int tocopy;
6911 size_t toread;
6912 struct buffer_head *bh;
6913 loff_t i_size = i_size_read(inode);
6914
6915 if (off > i_size)
6916 return 0;
6917 if (off+len > i_size)
6918 len = i_size-off;
6919 toread = len;
6920 while (toread > 0) {
6921 tocopy = sb->s_blocksize - offset < toread ?
6922 sb->s_blocksize - offset : toread;
1c215028
TT
6923 bh = ext4_bread(NULL, inode, blk, 0);
6924 if (IS_ERR(bh))
6925 return PTR_ERR(bh);
ac27a0ec
DK
6926 if (!bh) /* A hole? */
6927 memset(data, 0, tocopy);
6928 else
6929 memcpy(data, bh->b_data+offset, tocopy);
6930 brelse(bh);
6931 offset = 0;
6932 toread -= tocopy;
6933 data += tocopy;
6934 blk++;
6935 }
6936 return len;
6937}
6938
6939/* Write to quotafile (we know the transaction is already started and has
6940 * enough credits) */
617ba13b 6941static ssize_t ext4_quota_write(struct super_block *sb, int type,
ac27a0ec
DK
6942 const char *data, size_t len, loff_t off)
6943{
6944 struct inode *inode = sb_dqopt(sb)->files[type];
725d26d3 6945 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
4209ae12 6946 int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
c5e298ae 6947 int retries = 0;
ac27a0ec
DK
6948 struct buffer_head *bh;
6949 handle_t *handle = journal_current_handle();
6950
0390131b 6951 if (EXT4_SB(sb)->s_journal && !handle) {
b31e1552
ES
6952 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6953 " cancelled because transaction is not started",
9c3013e9
JK
6954 (unsigned long long)off, (unsigned long long)len);
6955 return -EIO;
6956 }
67eeb568
DM
6957 /*
6958 * Since we account only one data block in transaction credits,
6959 * then it is impossible to cross a block boundary.
6960 */
6961 if (sb->s_blocksize - offset < len) {
6962 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
6963 " cancelled because not block aligned",
6964 (unsigned long long)off, (unsigned long long)len);
6965 return -EIO;
6966 }
6967
c5e298ae
TT
6968 do {
6969 bh = ext4_bread(handle, inode, blk,
6970 EXT4_GET_BLOCKS_CREATE |
6971 EXT4_GET_BLOCKS_METADATA_NOFAIL);
45586c70 6972 } while (PTR_ERR(bh) == -ENOSPC &&
c5e298ae 6973 ext4_should_retry_alloc(inode->i_sb, &retries));
1c215028
TT
6974 if (IS_ERR(bh))
6975 return PTR_ERR(bh);
67eeb568
DM
6976 if (!bh)
6977 goto out;
5d601255 6978 BUFFER_TRACE(bh, "get write access");
188c299e 6979 err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
62d2b5f2
JK
6980 if (err) {
6981 brelse(bh);
1c215028 6982 return err;
ac27a0ec 6983 }
67eeb568
DM
6984 lock_buffer(bh);
6985 memcpy(bh->b_data+offset, data, len);
6986 flush_dcache_page(bh->b_page);
6987 unlock_buffer(bh);
62d2b5f2 6988 err = ext4_handle_dirty_metadata(handle, NULL, bh);
67eeb568 6989 brelse(bh);
ac27a0ec 6990out:
67eeb568
DM
6991 if (inode->i_size < off + len) {
6992 i_size_write(inode, off + len);
617ba13b 6993 EXT4_I(inode)->i_disksize = inode->i_size;
4209ae12
HS
6994 err2 = ext4_mark_inode_dirty(handle, inode);
6995 if (unlikely(err2 && !err))
6996 err = err2;
ac27a0ec 6997 }
4209ae12 6998 return err ? err : len;
ac27a0ec 6999}
ac27a0ec
DK
7000#endif
7001
152a0836
AV
7002static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
7003 const char *dev_name, void *data)
ac27a0ec 7004{
152a0836 7005 return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
ac27a0ec
DK
7006}
7007
c290ea01 7008#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
24b58424
TT
7009static inline void register_as_ext2(void)
7010{
7011 int err = register_filesystem(&ext2_fs_type);
7012 if (err)
7013 printk(KERN_WARNING
7014 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7015}
7016
7017static inline void unregister_as_ext2(void)
7018{
7019 unregister_filesystem(&ext2_fs_type);
7020}
2035e776
TT
7021
7022static inline int ext2_feature_set_ok(struct super_block *sb)
7023{
e2b911c5 7024 if (ext4_has_unknown_ext2_incompat_features(sb))
2035e776 7025 return 0;
bc98a42c 7026 if (sb_rdonly(sb))
2035e776 7027 return 1;
e2b911c5 7028 if (ext4_has_unknown_ext2_ro_compat_features(sb))
2035e776
TT
7029 return 0;
7030 return 1;
7031}
24b58424
TT
7032#else
7033static inline void register_as_ext2(void) { }
7034static inline void unregister_as_ext2(void) { }
2035e776 7035static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
24b58424
TT
7036#endif
7037
24b58424
TT
7038static inline void register_as_ext3(void)
7039{
7040 int err = register_filesystem(&ext3_fs_type);
7041 if (err)
7042 printk(KERN_WARNING
7043 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7044}
7045
7046static inline void unregister_as_ext3(void)
7047{
7048 unregister_filesystem(&ext3_fs_type);
7049}
2035e776
TT
7050
7051static inline int ext3_feature_set_ok(struct super_block *sb)
7052{
e2b911c5 7053 if (ext4_has_unknown_ext3_incompat_features(sb))
2035e776 7054 return 0;
e2b911c5 7055 if (!ext4_has_feature_journal(sb))
2035e776 7056 return 0;
bc98a42c 7057 if (sb_rdonly(sb))
2035e776 7058 return 1;
e2b911c5 7059 if (ext4_has_unknown_ext3_ro_compat_features(sb))
2035e776
TT
7060 return 0;
7061 return 1;
7062}
24b58424 7063
03010a33
TT
7064static struct file_system_type ext4_fs_type = {
7065 .owner = THIS_MODULE,
7066 .name = "ext4",
152a0836 7067 .mount = ext4_mount,
03010a33 7068 .kill_sb = kill_block_super,
14f3db55 7069 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
03010a33 7070};
7f78e035 7071MODULE_ALIAS_FS("ext4");
03010a33 7072
e9e3bcec
ES
7073/* Shared across all ext4 file systems */
7074wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
e9e3bcec 7075
5dabfc78 7076static int __init ext4_init_fs(void)
ac27a0ec 7077{
e9e3bcec 7078 int i, err;
c9de560d 7079
e294a537 7080 ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
07c0c5d8 7081 ext4_li_info = NULL;
07c0c5d8 7082
9a4c8019 7083 /* Build-time check for flags consistency */
12e9b892 7084 ext4_check_flag_values();
e9e3bcec 7085
e142d052 7086 for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
e9e3bcec 7087 init_waitqueue_head(&ext4__ioend_wq[i]);
e9e3bcec 7088
51865fda 7089 err = ext4_init_es();
6fd058f7
TT
7090 if (err)
7091 return err;
51865fda 7092
1dc0aa46 7093 err = ext4_init_pending();
22cfe4b4
EB
7094 if (err)
7095 goto out7;
7096
7097 err = ext4_init_post_read_processing();
1dc0aa46
EW
7098 if (err)
7099 goto out6;
7100
51865fda
ZL
7101 err = ext4_init_pageio();
7102 if (err)
b5799018 7103 goto out5;
51865fda 7104
5dabfc78 7105 err = ext4_init_system_zone();
bd2d0210 7106 if (err)
b5799018 7107 goto out4;
857ac889 7108
b5799018 7109 err = ext4_init_sysfs();
dd68314c 7110 if (err)
b5799018 7111 goto out3;
857ac889 7112
5dabfc78 7113 err = ext4_init_mballoc();
c9de560d
AT
7114 if (err)
7115 goto out2;
ac27a0ec
DK
7116 err = init_inodecache();
7117 if (err)
7118 goto out1;
aa75f4d3
HS
7119
7120 err = ext4_fc_init_dentry_cache();
7121 if (err)
7122 goto out05;
7123
24b58424 7124 register_as_ext3();
2035e776 7125 register_as_ext2();
03010a33 7126 err = register_filesystem(&ext4_fs_type);
ac27a0ec
DK
7127 if (err)
7128 goto out;
bfff6873 7129
ac27a0ec
DK
7130 return 0;
7131out:
24b58424
TT
7132 unregister_as_ext2();
7133 unregister_as_ext3();
aa75f4d3 7134out05:
ac27a0ec
DK
7135 destroy_inodecache();
7136out1:
5dabfc78 7137 ext4_exit_mballoc();
9c191f70 7138out2:
b5799018
TT
7139 ext4_exit_sysfs();
7140out3:
5dabfc78 7141 ext4_exit_system_zone();
b5799018 7142out4:
5dabfc78 7143 ext4_exit_pageio();
b5799018 7144out5:
22cfe4b4 7145 ext4_exit_post_read_processing();
1dc0aa46 7146out6:
22cfe4b4
EB
7147 ext4_exit_pending();
7148out7:
51865fda
ZL
7149 ext4_exit_es();
7150
ac27a0ec
DK
7151 return err;
7152}
7153
5dabfc78 7154static void __exit ext4_exit_fs(void)
ac27a0ec 7155{
bfff6873 7156 ext4_destroy_lazyinit_thread();
24b58424
TT
7157 unregister_as_ext2();
7158 unregister_as_ext3();
03010a33 7159 unregister_filesystem(&ext4_fs_type);
ac27a0ec 7160 destroy_inodecache();
5dabfc78 7161 ext4_exit_mballoc();
b5799018 7162 ext4_exit_sysfs();
5dabfc78
TT
7163 ext4_exit_system_zone();
7164 ext4_exit_pageio();
22cfe4b4 7165 ext4_exit_post_read_processing();
dd12ed14 7166 ext4_exit_es();
1dc0aa46 7167 ext4_exit_pending();
ac27a0ec
DK
7168}
7169
7170MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
83982b6f 7171MODULE_DESCRIPTION("Fourth Extended Filesystem");
ac27a0ec 7172MODULE_LICENSE("GPL");
7ef79ad5 7173MODULE_SOFTDEP("pre: crc32c");
5dabfc78
TT
7174module_init(ext4_init_fs)
7175module_exit(ext4_exit_fs)