Merge tag 'probes-fixes-v6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / fs / ext4 / balloc.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
ac27a0ec 2/*
617ba13b 3 * linux/fs/ext4/balloc.c
ac27a0ec
DK
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
11 * Big-endian to little-endian byte-swapping/bitmaps by
12 * David S. Miller (davem@caip.rutgers.edu), 1995
13 */
14
15#include <linux/time.h>
16#include <linux/capability.h>
17#include <linux/fs.h>
ac27a0ec
DK
18#include <linux/quotaops.h>
19#include <linux/buffer_head.h>
3dcf5451
CH
20#include "ext4.h"
21#include "ext4_jbd2.h"
e21675d4 22#include "mballoc.h"
3dcf5451 23
0562e0ba
JZ
24#include <trace/events/ext4.h>
25
5f163cc7
ES
26static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
27 ext4_group_t block_group);
ac27a0ec
DK
28/*
29 * balloc.c contains the blocks allocation and deallocation routines
30 */
31
bd86298e
LC
32/*
33 * Calculate block group number for a given block number
34 */
35ext4_group_t ext4_get_group_number(struct super_block *sb,
36 ext4_fsblk_t block)
37{
38 ext4_group_t group;
39
40 if (test_opt2(sb, STD_GROUP_SIZE))
960fd856
TT
41 group = (block -
42 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
bd86298e
LC
43 (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
44 else
45 ext4_get_group_no_and_offset(sb, block, &group, NULL);
46 return group;
47}
48
72b64b59 49/*
3212a80a
TT
50 * Calculate the block group number and offset into the block/cluster
51 * allocation bitmap, given a block number
72b64b59
AM
52 */
53void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
fd2d4291 54 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
72b64b59 55{
8c55e204 56 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
72b64b59
AM
57 ext4_grpblk_t offset;
58
8c55e204 59 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
3212a80a
TT
60 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
61 EXT4_SB(sb)->s_cluster_bits;
72b64b59
AM
62 if (offsetp)
63 *offsetp = offset;
64 if (blockgrpp)
8c55e204 65 *blockgrpp = blocknr;
72b64b59
AM
66
67}
68
68911009
LC
69/*
70 * Check whether the 'block' lives within the 'block_group'. Returns 1 if so
71 * and 0 otherwise.
72 */
73static inline int ext4_block_in_group(struct super_block *sb,
74 ext4_fsblk_t block,
75 ext4_group_t block_group)
0bf7e837
JS
76{
77 ext4_group_t actual_group;
68911009 78
bd86298e 79 actual_group = ext4_get_group_number(sb, block);
68911009 80 return (actual_group == block_group) ? 1 : 0;
0bf7e837
JS
81}
82
68e294dc
KS
83/*
84 * Return the number of clusters used for file system metadata; this
d5b8f310
TT
85 * represents the overhead needed by the file system.
86 */
c197855e
SH
87static unsigned ext4_num_overhead_clusters(struct super_block *sb,
88 ext4_group_t block_group,
89 struct ext4_group_desc *gdp)
0bf7e837 90{
68e294dc
KS
91 unsigned base_clusters, num_clusters;
92 int block_cluster = -1, inode_cluster;
93 int itbl_cluster_start = -1, itbl_cluster_end = -1;
d5b8f310 94 ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
68e294dc
KS
95 ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1;
96 ext4_fsblk_t itbl_blk_start, itbl_blk_end;
0bf7e837 97 struct ext4_sb_info *sbi = EXT4_SB(sb);
0bf7e837 98
d5b8f310
TT
99 /* This is the number of clusters used by the superblock,
100 * block group descriptors, and reserved block group
101 * descriptor blocks */
68e294dc
KS
102 base_clusters = ext4_num_base_meta_clusters(sb, block_group);
103 num_clusters = base_clusters;
104
105 /*
106 * Account and record inode table clusters if any cluster
107 * is in the block group, or inode table cluster range is
108 * [-1, -1] and won't overlap with block/inode bitmap cluster
109 * accounted below.
110 */
111 itbl_blk_start = ext4_inode_table(sb, gdp);
112 itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
113 if (itbl_blk_start <= end && itbl_blk_end >= start) {
114 itbl_blk_start = itbl_blk_start >= start ?
115 itbl_blk_start : start;
116 itbl_blk_end = itbl_blk_end <= end ?
117 itbl_blk_end : end;
118
119 itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
120 itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);
121
122 num_clusters += itbl_cluster_end - itbl_cluster_start + 1;
123 /* check if border cluster is overlapped */
124 if (itbl_cluster_start == base_clusters - 1)
125 num_clusters--;
126 }
d5b8f310
TT
127
128 /*
68e294dc
KS
129 * For the allocation bitmaps, we first need to check to see
130 * if the block is in the block group. If it is, then check
131 * to see if the cluster is already accounted for in the clusters
132 * used for the base metadata cluster and inode tables cluster.
d5b8f310
TT
133 * Normally all of these blocks are contiguous, so the special
134 * case handling shouldn't be necessary except for *very*
135 * unusual file system layouts.
136 */
137 if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
b0dd6b70
TT
138 block_cluster = EXT4_B2C(sbi,
139 ext4_block_bitmap(sb, gdp) - start);
68e294dc
KS
140 if (block_cluster >= base_clusters &&
141 (block_cluster < itbl_cluster_start ||
142 block_cluster > itbl_cluster_end))
d5b8f310 143 num_clusters++;
d5b8f310
TT
144 }
145
146 if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
147 inode_cluster = EXT4_B2C(sbi,
b0dd6b70 148 ext4_inode_bitmap(sb, gdp) - start);
68e294dc
KS
149 /*
150 * Additional check if inode bitmap is in just accounted
151 * block_cluster
152 */
153 if (inode_cluster != block_cluster &&
154 inode_cluster >= base_clusters &&
155 (inode_cluster < itbl_cluster_start ||
156 inode_cluster > itbl_cluster_end))
d5b8f310 157 num_clusters++;
0bf7e837 158 }
d5b8f310 159
d5b8f310 160 return num_clusters;
0bf7e837 161}
c2ea3fde 162
d5b8f310
TT
163static unsigned int num_clusters_in_group(struct super_block *sb,
164 ext4_group_t block_group)
49f7f9af 165{
d5b8f310
TT
166 unsigned int blocks;
167
49f7f9af
TT
168 if (block_group == ext4_get_groups_count(sb) - 1) {
169 /*
170 * Even though mke2fs always initializes the first and
171 * last group, just in case some other tool was used,
172 * we need to make sure we calculate the right free
173 * blocks.
174 */
d5b8f310 175 blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
49f7f9af
TT
176 ext4_group_first_block_no(sb, block_group);
177 } else
d5b8f310
TT
178 blocks = EXT4_BLOCKS_PER_GROUP(sb);
179 return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
49f7f9af
TT
180}
181
fd034a84 182/* Initializes an uninitialized block bitmap */
aef4885a 183static int ext4_init_block_bitmap(struct super_block *sb,
c197855e
SH
184 struct buffer_head *bh,
185 ext4_group_t block_group,
186 struct ext4_group_desc *gdp)
717d50e4 187{
d5b8f310 188 unsigned int bit, bit_max;
717d50e4 189 struct ext4_sb_info *sbi = EXT4_SB(sb);
fd034a84 190 ext4_fsblk_t start, tmp;
fd034a84 191
837c23fb 192 ASSERT(buffer_locked(bh));
fd034a84 193
feb0ab32 194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
db79e6d1
WS
195 ext4_mark_group_bitmap_corrupted(sb, block_group,
196 EXT4_GROUP_INFO_BBITMAP_CORRUPT |
197 EXT4_GROUP_INFO_IBITMAP_CORRUPT);
6a797d27 198 return -EFSBADCRC;
717d50e4 199 }
fd034a84 200 memset(bh->b_data, 0, sb->s_blocksize);
717d50e4 201
d5b8f310 202 bit_max = ext4_num_base_meta_clusters(sb, block_group);
5b9554dc
TT
203 if ((bit_max >> 3) >= bh->b_size)
204 return -EFSCORRUPTED;
205
fd034a84
TT
206 for (bit = 0; bit < bit_max; bit++)
207 ext4_set_bit(bit, bh->b_data);
d00a6d7b 208
fd034a84 209 start = ext4_group_first_block_no(sb, block_group);
717d50e4 210
fd034a84
TT
211 /* Set bits for block and inode bitmaps, and inode table */
212 tmp = ext4_block_bitmap(sb, gdp);
819b23f1 213 if (ext4_block_in_group(sb, tmp, block_group))
d5b8f310 214 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
717d50e4 215
fd034a84 216 tmp = ext4_inode_bitmap(sb, gdp);
819b23f1 217 if (ext4_block_in_group(sb, tmp, block_group))
d5b8f310 218 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
0bf7e837 219
fd034a84
TT
220 tmp = ext4_inode_table(sb, gdp);
221 for (; tmp < ext4_inode_table(sb, gdp) +
222 sbi->s_itb_per_group; tmp++) {
819b23f1 223 if (ext4_block_in_group(sb, tmp, block_group))
d5b8f310 224 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
717d50e4 225 }
d5b8f310 226
fd034a84
TT
227 /*
228 * Also if the number of blocks within the group is less than
229 * the blocksize * 8 ( which is the size of bitmap ), set rest
230 * of the block bitmap to 1
231 */
d5b8f310 232 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
fd034a84 233 sb->s_blocksize * 8, bh->b_data);
aef4885a 234 return 0;
717d50e4
AD
235}
236
fd034a84
TT
237/* Return the number of free blocks in a block group. It is used when
238 * the block bitmap is uninitialized, so we can't just count the bits
239 * in the bitmap. */
cff1dfd7
TT
240unsigned ext4_free_clusters_after_init(struct super_block *sb,
241 ext4_group_t block_group,
242 struct ext4_group_desc *gdp)
fd034a84 243{
666245d9 244 return num_clusters_in_group(sb, block_group) -
d5b8f310 245 ext4_num_overhead_clusters(sb, block_group, gdp);
fd034a84 246}
717d50e4 247
ac27a0ec
DK
248/*
249 * The free blocks are managed by bitmaps. A file system contains several
250 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
251 * block for inodes, N blocks for the inode table and data blocks.
252 *
253 * The file system contains group descriptors which are located after the
254 * super block. Each descriptor contains the number of the bitmap block and
255 * the free blocks count in the block. The descriptors are loaded in memory
e627432c 256 * when a file system is mounted (see ext4_fill_super).
ac27a0ec
DK
257 */
258
ac27a0ec 259/**
617ba13b 260 * ext4_get_group_desc() -- load group descriptor from disk
ac27a0ec
DK
261 * @sb: super block
262 * @block_group: given block group
263 * @bh: pointer to the buffer head to store the block
264 * group descriptor
265 */
af5bc92d 266struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
fd2d4291 267 ext4_group_t block_group,
af5bc92d 268 struct buffer_head **bh)
ac27a0ec 269{
498e5f24
TT
270 unsigned int group_desc;
271 unsigned int offset;
8df9675f 272 ext4_group_t ngroups = ext4_get_groups_count(sb);
af5bc92d 273 struct ext4_group_desc *desc;
617ba13b 274 struct ext4_sb_info *sbi = EXT4_SB(sb);
1d0c3924 275 struct buffer_head *bh_p;
ac27a0ec 276
8df9675f 277 if (block_group >= ngroups) {
12062ddd
ES
278 ext4_error(sb, "block_group >= groups_count - block_group = %u,"
279 " groups_count = %u", block_group, ngroups);
ac27a0ec
DK
280
281 return NULL;
282 }
ac27a0ec 283
617ba13b
MC
284 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
285 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
1d0c3924
TT
286 bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
287 /*
288 * sbi_array_rcu_deref returns with rcu unlocked, this is ok since
289 * the pointer being dereferenced won't be dereferenced again. By
290 * looking at the usage in add_new_gdb() the value isn't modified,
291 * just the pointer, and so it remains valid.
292 */
293 if (!bh_p) {
12062ddd 294 ext4_error(sb, "Group descriptor not loaded - "
498e5f24 295 "block_group = %u, group_desc = %u, desc = %u",
af5bc92d 296 block_group, group_desc, offset);
ac27a0ec
DK
297 return NULL;
298 }
299
0d1ee42f 300 desc = (struct ext4_group_desc *)(
1d0c3924 301 (__u8 *)bh_p->b_data +
0d1ee42f 302 offset * EXT4_DESC_SIZE(sb));
ac27a0ec 303 if (bh)
1d0c3924 304 *bh = bh_p;
0d1ee42f 305 return desc;
ac27a0ec
DK
306}
307
fa08a7b6
YB
308static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
309 ext4_group_t block_group,
310 struct buffer_head *bh)
311{
312 ext4_grpblk_t next_zero_bit;
313 unsigned long bitmap_size = sb->s_blocksize * 8;
314 unsigned int offset = num_clusters_in_group(sb, block_group);
315
316 if (bitmap_size <= offset)
317 return 0;
318
319 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
320
321 return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
322}
323
5354b2af
TT
324struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
325 ext4_group_t group)
326{
327 struct ext4_group_info **grp_info;
328 long indexv, indexh;
329
330 if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) {
331 ext4_error(sb, "invalid group %u", group);
332 return NULL;
333 }
334 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
335 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
336 grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
337 return grp_info[indexh];
338}
339
7a4c5de2
TT
340/*
341 * Return the block number which was discovered to be invalid, or 0 if
342 * the block bitmap is valid.
343 */
344static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
345 struct ext4_group_desc *desc,
dbde0abe 346 ext4_group_t block_group,
7a4c5de2 347 struct buffer_head *bh)
abcb2947 348{
e674e5cb 349 struct ext4_sb_info *sbi = EXT4_SB(sb);
abcb2947
AK
350 ext4_grpblk_t offset;
351 ext4_grpblk_t next_zero_bit;
22be37ac 352 ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb);
7a4c5de2 353 ext4_fsblk_t blk;
abcb2947
AK
354 ext4_fsblk_t group_first_block;
355
e2b911c5 356 if (ext4_has_feature_flex_bg(sb)) {
abcb2947
AK
357 /* with FLEX_BG, the inode/block bitmaps and itable
358 * blocks may not be in the group at all
359 * so the bitmap validation will be skipped for those groups
360 * or it has to also read the block group where the bitmaps
361 * are located to verify they are set.
362 */
7a4c5de2 363 return 0;
abcb2947
AK
364 }
365 group_first_block = ext4_group_first_block_no(sb, block_group);
366
367 /* check whether block bitmap block number is set */
7a4c5de2
TT
368 blk = ext4_block_bitmap(sb, desc);
369 offset = blk - group_first_block;
22be37ac 370 if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
7dac4a17 371 !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
abcb2947 372 /* bad block bitmap */
7a4c5de2 373 return blk;
abcb2947
AK
374
375 /* check whether the inode bitmap block number is set */
7a4c5de2
TT
376 blk = ext4_inode_bitmap(sb, desc);
377 offset = blk - group_first_block;
22be37ac 378 if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
7dac4a17 379 !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
abcb2947 380 /* bad block bitmap */
7a4c5de2 381 return blk;
abcb2947
AK
382
383 /* check whether the inode table block number is set */
7a4c5de2
TT
384 blk = ext4_inode_table(sb, desc);
385 offset = blk - group_first_block;
22be37ac 386 if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
3d61ef10 387 EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit)
7dac4a17 388 return blk;
abcb2947 389 next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
3d61ef10 390 EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1,
e674e5cb
DW
391 EXT4_B2C(sbi, offset));
392 if (next_zero_bit <
3d61ef10 393 EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1)
7a4c5de2
TT
394 /* bad bitmap for inode tables */
395 return blk;
abcb2947
AK
396 return 0;
397}
fa77dcfa 398
9008a58e
DW
399static int ext4_validate_block_bitmap(struct super_block *sb,
400 struct ext4_group_desc *desc,
401 ext4_group_t block_group,
402 struct buffer_head *bh)
fa77dcfa 403{
7a4c5de2 404 ext4_fsblk_t blk;
8016e29f
HS
405 struct ext4_group_info *grp;
406
407 if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
408 return 0;
409
410 grp = ext4_get_group_info(sb, block_group);
7a4c5de2 411
9008a58e
DW
412 if (buffer_verified(bh))
413 return 0;
5354b2af 414 if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
9008a58e 415 return -EFSCORRUPTED;
fa77dcfa
DW
416
417 ext4_lock_group(sb, block_group);
8d5a803c
TT
418 if (buffer_verified(bh))
419 goto verified;
82483dfe 420 if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) ||
46f870d6 421 ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
7a4c5de2 422 ext4_unlock_group(sb, block_group);
9008a58e 423 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
db79e6d1
WS
424 ext4_mark_group_bitmap_corrupted(sb, block_group,
425 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
9008a58e 426 return -EFSBADCRC;
7a4c5de2 427 }
9008a58e
DW
428 blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
429 if (unlikely(blk != 0)) {
7a4c5de2 430 ext4_unlock_group(sb, block_group);
9008a58e
DW
431 ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
432 block_group, blk);
db79e6d1
WS
433 ext4_mark_group_bitmap_corrupted(sb, block_group,
434 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
9008a58e 435 return -EFSCORRUPTED;
7a4c5de2 436 }
fa08a7b6
YB
437 blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
438 if (unlikely(blk != 0)) {
439 ext4_unlock_group(sb, block_group);
440 ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
441 block_group, blk);
442 ext4_mark_group_bitmap_corrupted(sb, block_group,
443 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
444 return -EFSCORRUPTED;
445 }
7a4c5de2 446 set_buffer_verified(bh);
8d5a803c 447verified:
fa77dcfa 448 ext4_unlock_group(sb, block_group);
9008a58e 449 return 0;
fa77dcfa
DW
450}
451
ac27a0ec 452/**
15b49132 453 * ext4_read_block_bitmap_nowait()
ac27a0ec
DK
454 * @sb: super block
455 * @block_group: given block group
919adbfe 456 * @ignore_locked: ignore locked buffers
ac27a0ec 457 *
abcb2947
AK
458 * Read the bitmap for a given block_group,and validate the
459 * bits for block/inode/inode tables are set in the bitmaps
ac27a0ec 460 *
9033783c 461 * Return buffer_head on success or an ERR_PTR in case of failure.
ac27a0ec 462 */
717d50e4 463struct buffer_head *
cfd73237
AZ
464ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
465 bool ignore_locked)
ac27a0ec 466{
af5bc92d 467 struct ext4_group_desc *desc;
7dac4a17 468 struct ext4_sb_info *sbi = EXT4_SB(sb);
813e5727 469 struct buffer_head *bh;
7c9e69fa 470 ext4_fsblk_t bitmap_blk;
9008a58e 471 int err;
ac27a0ec 472
717d50e4 473 desc = ext4_get_group_desc(sb, block_group, NULL);
ac27a0ec 474 if (!desc)
9008a58e 475 return ERR_PTR(-EFSCORRUPTED);
7c9e69fa 476 bitmap_blk = ext4_block_bitmap(sb, desc);
7dac4a17
TT
477 if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
478 (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
479 ext4_error(sb, "Invalid block bitmap block %llu in "
480 "block_group %u", bitmap_blk, block_group);
736dedbb
WS
481 ext4_mark_group_bitmap_corrupted(sb, block_group,
482 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
7dac4a17
TT
483 return ERR_PTR(-EFSCORRUPTED);
484 }
abcb2947
AK
485 bh = sb_getblk(sb, bitmap_blk);
486 if (unlikely(!bh)) {
5ef2a699
WS
487 ext4_warning(sb, "Cannot get buffer for block bitmap - "
488 "block_group = %u, block_bitmap = %llu",
489 block_group, bitmap_blk);
9008a58e 490 return ERR_PTR(-ENOMEM);
abcb2947 491 }
2ccb5fb9 492
cfd73237
AZ
493 if (ignore_locked && buffer_locked(bh)) {
494 /* buffer under IO already, return if called for prefetching */
495 put_bh(bh);
496 return NULL;
497 }
498
2ccb5fb9 499 if (bitmap_uptodate(bh))
fa77dcfa 500 goto verify;
abcb2947 501
c806e68f 502 lock_buffer(bh);
2ccb5fb9
AK
503 if (bitmap_uptodate(bh)) {
504 unlock_buffer(bh);
fa77dcfa 505 goto verify;
2ccb5fb9 506 }
955ce5f5 507 ext4_lock_group(sb, block_group);
8844618d
TT
508 if (ext4_has_group_desc_csum(sb) &&
509 (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
510 if (block_group == 0) {
511 ext4_unlock_group(sb, block_group);
512 unlock_buffer(bh);
513 ext4_error(sb, "Block bitmap for bg 0 marked "
514 "uninitialized");
515 err = -EFSCORRUPTED;
516 goto out;
517 }
aef4885a 518 err = ext4_init_block_bitmap(sb, bh, block_group, desc);
05145bd7 519 if (err) {
b5aa06bf
KS
520 ext4_unlock_group(sb, block_group);
521 unlock_buffer(bh);
05145bd7
JK
522 ext4_error(sb, "Failed to init block bitmap for group "
523 "%u: %d", block_group, err);
9008a58e 524 goto out;
05145bd7 525 }
b5aa06bf
KS
526 set_bitmap_uptodate(bh);
527 set_buffer_uptodate(bh);
528 set_buffer_verified(bh);
529 ext4_unlock_group(sb, block_group);
530 unlock_buffer(bh);
cefa74d0 531 return bh;
717d50e4 532 }
955ce5f5 533 ext4_unlock_group(sb, block_group);
2ccb5fb9
AK
534 if (buffer_uptodate(bh)) {
535 /*
536 * if not uninit if bh is uptodate,
537 * bitmap is also uptodate
538 */
539 set_bitmap_uptodate(bh);
540 unlock_buffer(bh);
fa77dcfa 541 goto verify;
2ccb5fb9
AK
542 }
543 /*
813e5727 544 * submit the buffer_head for reading
2ccb5fb9 545 */
813e5727 546 set_buffer_new(bh);
ab74c7b2 547 trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
2d069c08 548 ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO |
549 (ignore_locked ? REQ_RAHEAD : 0),
550 ext4_end_bitmap_read);
813e5727 551 return bh;
fa77dcfa 552verify:
9008a58e
DW
553 err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
554 if (err)
555 goto out;
556 return bh;
557out:
48d9eb97 558 put_bh(bh);
9008a58e 559 return ERR_PTR(err);
813e5727
TT
560}
561
9033783c 562/* Returns 0 on success, -errno on error */
813e5727
TT
563int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
564 struct buffer_head *bh)
565{
566 struct ext4_group_desc *desc;
567
568 if (!buffer_new(bh))
569 return 0;
570 desc = ext4_get_group_desc(sb, block_group, NULL);
571 if (!desc)
9008a58e 572 return -EFSCORRUPTED;
813e5727 573 wait_on_buffer(bh);
46f870d6 574 ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO);
813e5727 575 if (!buffer_uptodate(bh)) {
54d3adbc
TT
576 ext4_error_err(sb, EIO, "Cannot read block bitmap - "
577 "block_group = %u, block_bitmap = %llu",
578 block_group, (unsigned long long) bh->b_blocknr);
736dedbb
WS
579 ext4_mark_group_bitmap_corrupted(sb, block_group,
580 EXT4_GROUP_INFO_BBITMAP_CORRUPT);
9008a58e 581 return -EIO;
abcb2947 582 }
813e5727
TT
583 clear_buffer_new(bh);
584 /* Panic or remount fs read-only if block bitmap is invalid */
9008a58e 585 return ext4_validate_block_bitmap(sb, desc, block_group, bh);
813e5727
TT
586}
587
588struct buffer_head *
589ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
590{
591 struct buffer_head *bh;
9008a58e 592 int err;
813e5727 593
cfd73237 594 bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
9008a58e
DW
595 if (IS_ERR(bh))
596 return bh;
597 err = ext4_wait_block_bitmap(sb, block_group, bh);
598 if (err) {
813e5727 599 put_bh(bh);
9008a58e 600 return ERR_PTR(err);
813e5727 601 }
ac27a0ec
DK
602 return bh;
603}
ac27a0ec 604
8c3bf8a0 605/**
df55c99d 606 * ext4_has_free_clusters()
8c3bf8a0 607 * @sbi: in-core super block structure.
df55c99d
TT
608 * @nclusters: number of needed blocks
609 * @flags: flags from ext4_mb_new_blocks()
8c3bf8a0 610 *
df55c99d 611 * Check if filesystem has nclusters free & available for allocation.
8c3bf8a0
ES
612 * On success return 1, return 0 on failure.
613 */
df55c99d
TT
614static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
615 s64 nclusters, unsigned int flags)
a30d542a 616{
27dd4385 617 s64 free_clusters, dirty_clusters, rsv, resv_clusters;
57042651 618 struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
df55c99d 619 struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
a30d542a 620
df55c99d
TT
621 free_clusters = percpu_counter_read_positive(fcc);
622 dirty_clusters = percpu_counter_read_positive(dcc);
27dd4385 623 resv_clusters = atomic64_read(&sbi->s_resv_clusters);
304e220f
LC
624
625 /*
626 * r_blocks_count should always be multiple of the cluster ratio so
627 * we are safe to do a plane bit shift only.
628 */
27dd4385
LC
629 rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
630 resv_clusters;
a30d542a 631
27dd4385 632 if (free_clusters - (nclusters + rsv + dirty_clusters) <
df55c99d 633 EXT4_FREECLUSTERS_WATERMARK) {
304e220f 634 free_clusters = percpu_counter_sum_positive(fcc);
df55c99d 635 dirty_clusters = percpu_counter_sum_positive(dcc);
6bc6e63f 636 }
df55c99d
TT
637 /* Check whether we have space after accounting for current
638 * dirty clusters & root reserved clusters.
6bc6e63f 639 */
27dd4385 640 if (free_clusters >= (rsv + nclusters + dirty_clusters))
a996031c 641 return 1;
a30d542a 642
df55c99d 643 /* Hm, nope. Are (enough) root reserved clusters available? */
08cefc7a
EB
644 if (uid_eq(sbi->s_resuid, current_fsuid()) ||
645 (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
55f020db 646 capable(CAP_SYS_RESOURCE) ||
27dd4385 647 (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
55f020db 648
27dd4385
LC
649 if (free_clusters >= (nclusters + dirty_clusters +
650 resv_clusters))
651 return 1;
652 }
653 /* No free blocks. Let's see if we can dip into reserved pool */
654 if (flags & EXT4_MB_USE_RESERVED) {
df55c99d 655 if (free_clusters >= (nclusters + dirty_clusters))
a996031c
ES
656 return 1;
657 }
658
659 return 0;
a30d542a
AK
660}
661
e7d5f315
TT
662int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
663 s64 nclusters, unsigned int flags)
ac27a0ec 664{
df55c99d 665 if (ext4_has_free_clusters(sbi, nclusters, flags)) {
e7d5f315 666 percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
16eb7295 667 return 0;
8c3bf8a0
ES
668 } else
669 return -ENOSPC;
a30d542a 670}
07031431 671
ac27a0ec 672/**
c60990b3 673 * ext4_should_retry_alloc() - check if a block allocation should be retried
efc61345
EW
674 * @sb: superblock
675 * @retries: number of retry attempts made so far
ac27a0ec 676 *
efc61345
EW
677 * ext4_should_retry_alloc() is called when ENOSPC is returned while
678 * attempting to allocate blocks. If there's an indication that a pending
679 * journal transaction might free some space and allow another attempt to
680 * succeed, this function will wait for the current or committing transaction
681 * to complete and then return TRUE.
ac27a0ec 682 */
617ba13b 683int ext4_should_retry_alloc(struct super_block *sb, int *retries)
ac27a0ec 684{
efc61345
EW
685 struct ext4_sb_info *sbi = EXT4_SB(sb);
686
687 if (!sbi->s_journal)
ac27a0ec
DK
688 return 0;
689
efc61345
EW
690 if (++(*retries) > 3) {
691 percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
68fd9750 692 return 0;
efc61345 693 }
68fd9750 694
efc61345
EW
695 /*
696 * if there's no indication that blocks are about to be freed it's
697 * possible we just missed a transaction commit that did so
698 */
699 smp_mb();
5036ab8d
WJ
700 if (sbi->s_mb_free_pending == 0) {
701 if (test_opt(sb, DISCARD)) {
702 atomic_inc(&sbi->s_retry_alloc_pending);
703 flush_work(&sbi->s_discard_work);
704 atomic_dec(&sbi->s_retry_alloc_pending);
705 }
efc61345 706 return ext4_has_free_clusters(sbi, 1, 0);
5036ab8d 707 }
efc61345
EW
708
709 /*
710 * it's possible we've just missed a transaction commit here,
711 * so ignore the returned status
712 */
4978c659 713 ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
efc61345 714 (void) jbd2_journal_force_commit_nested(sbi->s_journal);
dbc427ce 715 return 1;
ac27a0ec
DK
716}
717
654b4908 718/*
d2a17637 719 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
654b4908
AK
720 *
721 * @handle: handle to this transaction
722 * @inode: file inode
723 * @goal: given target block(filesystem wide)
7b415bf6 724 * @count: pointer to total number of clusters needed
654b4908
AK
725 * @errp: error code
726 *
97df5d15 727 * Return 1st allocated block number on success, *count stores total account
d2a17637 728 * error stores in errp pointer
654b4908 729 */
d2a17637 730ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
55f020db
AH
731 ext4_fsblk_t goal, unsigned int flags,
732 unsigned long *count, int *errp)
654b4908 733{
97df5d15 734 struct ext4_allocation_request ar;
d2a17637 735 ext4_fsblk_t ret;
97df5d15
TT
736
737 memset(&ar, 0, sizeof(ar));
738 /* Fill with neighbour allocated blocks */
739 ar.inode = inode;
740 ar.goal = goal;
741 ar.len = count ? *count : 1;
55f020db 742 ar.flags = flags;
97df5d15
TT
743
744 ret = ext4_mb_new_blocks(handle, &ar, errp);
745 if (count)
746 *count = ar.len;
d2a17637 747 /*
72b8ab9d
ES
748 * Account for the allocated meta blocks. We will never
749 * fail EDQUOT for metdata, but we do account for it.
d2a17637 750 */
e3cf5d5d 751 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
7b415bf6
AK
752 dquot_alloc_block_nofail(inode,
753 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
d2a17637
MC
754 }
755 return ret;
654b4908
AK
756}
757
ac27a0ec 758/**
5dee5437 759 * ext4_count_free_clusters() -- count filesystem free clusters
ac27a0ec
DK
760 * @sb: superblock
761 *
5dee5437 762 * Adds up the number of free clusters from each block group.
ac27a0ec 763 */
5dee5437 764ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
ac27a0ec 765{
617ba13b
MC
766 ext4_fsblk_t desc_count;
767 struct ext4_group_desc *gdp;
fd2d4291 768 ext4_group_t i;
8df9675f 769 ext4_group_t ngroups = ext4_get_groups_count(sb);
2746f7a1 770 struct ext4_group_info *grp;
617ba13b
MC
771#ifdef EXT4FS_DEBUG
772 struct ext4_super_block *es;
773 ext4_fsblk_t bitmap_count;
498e5f24 774 unsigned int x;
ac27a0ec
DK
775 struct buffer_head *bitmap_bh = NULL;
776
617ba13b 777 es = EXT4_SB(sb)->s_es;
ac27a0ec
DK
778 desc_count = 0;
779 bitmap_count = 0;
780 gdp = NULL;
781
ac27a0ec 782 for (i = 0; i < ngroups; i++) {
617ba13b 783 gdp = ext4_get_group_desc(sb, i, NULL);
ac27a0ec
DK
784 if (!gdp)
785 continue;
2746f7a1
DW
786 grp = NULL;
787 if (EXT4_SB(sb)->s_group_info)
788 grp = ext4_get_group_info(sb, i);
789 if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
790 desc_count += ext4_free_group_clusters(sb, gdp);
ac27a0ec 791 brelse(bitmap_bh);
574ca174 792 bitmap_bh = ext4_read_block_bitmap(sb, i);
9008a58e
DW
793 if (IS_ERR(bitmap_bh)) {
794 bitmap_bh = NULL;
ac27a0ec 795 continue;
9008a58e 796 }
ac27a0ec 797
f6fb99ca 798 x = ext4_count_free(bitmap_bh->b_data,
036acea2 799 EXT4_CLUSTERS_PER_GROUP(sb) / 8);
9fd9784c 800 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
021b65bb 801 i, ext4_free_group_clusters(sb, gdp), x);
ac27a0ec
DK
802 bitmap_count += x;
803 }
804 brelse(bitmap_bh);
5dee5437
TT
805 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
806 ", computed = %llu, %llu\n",
810da240 807 EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
4776004f 808 desc_count, bitmap_count);
ac27a0ec
DK
809 return bitmap_count;
810#else
811 desc_count = 0;
ac27a0ec 812 for (i = 0; i < ngroups; i++) {
617ba13b 813 gdp = ext4_get_group_desc(sb, i, NULL);
ac27a0ec
DK
814 if (!gdp)
815 continue;
2746f7a1
DW
816 grp = NULL;
817 if (EXT4_SB(sb)->s_group_info)
818 grp = ext4_get_group_info(sb, i);
819 if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
820 desc_count += ext4_free_group_clusters(sb, gdp);
ac27a0ec
DK
821 }
822
823 return desc_count;
824#endif
825}
826
fd2d4291 827static inline int test_root(ext4_group_t a, int b)
ac27a0ec 828{
f4afb4f4
TT
829 while (1) {
830 if (a < b)
831 return 0;
832 if (a == b)
833 return 1;
834 if ((a % b) != 0)
835 return 0;
836 a = a / b;
837 }
ac27a0ec
DK
838}
839
ac27a0ec 840/**
617ba13b 841 * ext4_bg_has_super - number of blocks used by the superblock in group
ac27a0ec
DK
842 * @sb: superblock for filesystem
843 * @group: group number to check
844 *
845 * Return the number of blocks used by the superblock (primary or backup)
846 * in this group. Currently this will be only 0 or 1.
847 */
fd2d4291 848int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
ac27a0ec 849{
1beeef1b
DW
850 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
851
852 if (group == 0)
853 return 1;
e2b911c5 854 if (ext4_has_feature_sparse_super2(sb)) {
1beeef1b
DW
855 if (group == le32_to_cpu(es->s_backup_bgs[0]) ||
856 group == le32_to_cpu(es->s_backup_bgs[1]))
857 return 1;
858 return 0;
859 }
e2b911c5 860 if ((group <= 1) || !ext4_has_feature_sparse_super(sb))
1beeef1b
DW
861 return 1;
862 if (!(group & 1))
ac27a0ec 863 return 0;
1beeef1b
DW
864 if (test_root(group, 3) || (test_root(group, 5)) ||
865 test_root(group, 7))
866 return 1;
867
868 return 0;
ac27a0ec
DK
869}
870
fd2d4291
AM
871static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
872 ext4_group_t group)
ac27a0ec 873{
617ba13b 874 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
fd2d4291
AM
875 ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
876 ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
ac27a0ec
DK
877
878 if (group == first || group == first + 1 || group == last)
879 return 1;
880 return 0;
881}
882
fd2d4291
AM
883static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
884 ext4_group_t group)
ac27a0ec 885{
8dadb198
TT
886 if (!ext4_bg_has_super(sb, group))
887 return 0;
888
ad3f09be 889 return EXT4_SB(sb)->s_gdb_count;
ac27a0ec
DK
890}
891
892/**
617ba13b 893 * ext4_bg_num_gdb - number of blocks used by the group table in group
ac27a0ec
DK
894 * @sb: superblock for filesystem
895 * @group: group number to check
896 *
897 * Return the number of blocks used by the group descriptor table
898 * (primary or backup) in this group. In the future there may be a
899 * different number of descriptor blocks in each group.
900 */
fd2d4291 901unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
ac27a0ec
DK
902{
903 unsigned long first_meta_bg =
617ba13b
MC
904 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
905 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
ac27a0ec 906
e2b911c5 907 if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg)
af5bc92d 908 return ext4_bg_num_gdb_nometa(sb, group);
ac27a0ec 909
617ba13b 910 return ext4_bg_num_gdb_meta(sb,group);
ac27a0ec
DK
911
912}
c2ea3fde 913
49f7f9af 914/*
d5b8f310 915 * This function returns the number of file system metadata clusters at
49f7f9af
TT
916 * the beginning of a block group, including the reserved gdt blocks.
917 */
5f163cc7 918static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
d5b8f310 919 ext4_group_t block_group)
49f7f9af
TT
920{
921 struct ext4_sb_info *sbi = EXT4_SB(sb);
d5b8f310 922 unsigned num;
49f7f9af
TT
923
924 /* Check for superblock and gdt backups in this group */
925 num = ext4_bg_has_super(sb, block_group);
926
e2b911c5 927 if (!ext4_has_feature_meta_bg(sb) ||
49f7f9af
TT
928 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
929 sbi->s_desc_per_block) {
930 if (num) {
a38627f1 931 num += ext4_bg_num_gdb_nometa(sb, block_group);
49f7f9af
TT
932 num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
933 }
934 } else { /* For META_BG_BLOCK_GROUPS */
a38627f1 935 num += ext4_bg_num_gdb_meta(sb, block_group);
49f7f9af 936 }
d5b8f310 937 return EXT4_NUM_B2C(sbi, num);
49f7f9af 938}
f86186b4
ES
939/**
940 * ext4_inode_to_goal_block - return a hint for block allocation
941 * @inode: inode for block allocation
942 *
943 * Return the ideal location to start allocating blocks for a
944 * newly created inode.
945 */
946ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
947{
948 struct ext4_inode_info *ei = EXT4_I(inode);
949 ext4_group_t block_group;
950 ext4_grpblk_t colour;
951 int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
952 ext4_fsblk_t bg_start;
953 ext4_fsblk_t last_block;
954
955 block_group = ei->i_block_group;
956 if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
957 /*
958 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
959 * block groups per flexgroup, reserve the first block
960 * group for directories and special files. Regular
961 * files will start at the second block group. This
962 * tends to speed up directory access and improves
963 * fsck times.
964 */
965 block_group &= ~(flex_size-1);
966 if (S_ISREG(inode->i_mode))
967 block_group++;
968 }
969 bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
970 last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
971
972 /*
973 * If we are doing delayed allocation, we don't need take
974 * colour into account.
975 */
976 if (test_opt(inode->i_sb, DELALLOC))
977 return bg_start;
978
979 if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
9bee5779 980 colour = (task_pid_nr(current) % 16) *
f86186b4
ES
981 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
982 else
9bee5779
RH
983 colour = (task_pid_nr(current) % 16) *
984 ((last_block - bg_start) / 16);
f86186b4
ES
985 return bg_start + colour;
986}
987