buffer: remove folio_create_empty_buffers()
[linux-block.git] / fs / gfs2 / bmap.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
b3b94faa
DT
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
64dd153c 10#include <linux/blkdev.h>
5c676f6d 11#include <linux/gfs2_ondisk.h>
71b86f56 12#include <linux/crc32.h>
3974320c 13#include <linux/iomap.h>
98583b3e 14#include <linux/ktime.h>
b3b94faa
DT
15
16#include "gfs2.h"
5c676f6d 17#include "incore.h"
b3b94faa
DT
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
b3b94faa 21#include "meta_io.h"
b3b94faa
DT
22#include "quota.h"
23#include "rgrp.h"
45138990 24#include "log.h"
4c16c36a 25#include "super.h"
b3b94faa 26#include "trans.h"
18ec7d5c 27#include "dir.h"
5c676f6d 28#include "util.h"
64bc06bb 29#include "aops.h"
63997775 30#include "trace_gfs2.h"
b3b94faa
DT
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
dbac6710 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
39 int mp_fheight; /* find_metapath height */
40 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
41};
42
64bc06bb
AG
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
f25ef0c1 45/**
81cb277e 46 * gfs2_unstuffer_folio - unstuff a stuffed inode into a block cached by a folio
f25ef0c1
SW
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
81cb277e 50 * @folio: The folio.
f25ef0c1
SW
51 *
52 * Returns: errno
53 */
81cb277e
MWO
54static int gfs2_unstuffer_folio(struct gfs2_inode *ip, struct buffer_head *dibh,
55 u64 block, struct folio *folio)
f25ef0c1 56{
f25ef0c1 57 struct inode *inode = &ip->i_inode;
f25ef0c1 58
81cb277e
MWO
59 if (!folio_test_uptodate(folio)) {
60 void *kaddr = kmap_local_folio(folio, 0);
602c89d2
SW
61 u64 dsize = i_size_read(inode);
62
602c89d2 63 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
81cb277e
MWO
64 memset(kaddr + dsize, 0, folio_size(folio) - dsize);
65 kunmap_local(kaddr);
f25ef0c1 66
81cb277e 67 folio_mark_uptodate(folio);
f25ef0c1
SW
68 }
69
2164f9b9 70 if (gfs2_is_jdata(ip)) {
81cb277e 71 struct buffer_head *bh = folio_buffers(folio);
f25ef0c1 72
81cb277e 73 if (!bh)
0a88810d 74 bh = create_empty_buffers(folio,
81cb277e 75 BIT(inode->i_blkbits), BIT(BH_Uptodate));
f25ef0c1 76
2164f9b9
CH
77 if (!buffer_mapped(bh))
78 map_bh(bh, inode->i_sb, block);
f25ef0c1 79
2164f9b9 80 set_buffer_uptodate(bh);
350a9b0a 81 gfs2_trans_add_data(ip->i_gl, bh);
2164f9b9 82 } else {
81cb277e 83 folio_mark_dirty(folio);
845802b1
AG
84 gfs2_ordered_add_inode(ip);
85 }
f25ef0c1 86
f25ef0c1
SW
87 return 0;
88}
89
81cb277e 90static int __gfs2_unstuff_inode(struct gfs2_inode *ip, struct folio *folio)
b3b94faa
DT
91{
92 struct buffer_head *bh, *dibh;
48516ced 93 struct gfs2_dinode *di;
cd915493 94 u64 block = 0;
18ec7d5c 95 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
96 int error;
97
b3b94faa
DT
98 error = gfs2_meta_inode_buffer(ip, &dibh);
99 if (error)
7a607a41 100 return error;
907b9bce 101
a2e0f799 102 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
103 /* Get a free block, fill it with the stuffed data,
104 and write it out to disk */
105
b45e41d7 106 unsigned int n = 1;
6e87ed0f 107 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
108 if (error)
109 goto out_brelse;
18ec7d5c 110 if (isdir) {
fbb27873 111 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 112 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
113 if (error)
114 goto out_brelse;
48516ced 115 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
116 dibh, sizeof(struct gfs2_dinode));
117 brelse(bh);
118 } else {
81cb277e 119 error = gfs2_unstuffer_folio(ip, dibh, block, folio);
b3b94faa
DT
120 if (error)
121 goto out_brelse;
122 }
123 }
124
125 /* Set up the pointer to the new block */
126
350a9b0a 127 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 128 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
129 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
130
a2e0f799 131 if (i_size_read(&ip->i_inode)) {
48516ced 132 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
133 gfs2_add_inode_blocks(&ip->i_inode, 1);
134 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
135 }
136
ecc30c79 137 ip->i_height = 1;
48516ced 138 di->di_height = cpu_to_be16(1);
b3b94faa 139
a91ea69f 140out_brelse:
b3b94faa 141 brelse(dibh);
7a607a41
AG
142 return error;
143}
144
145/**
146 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
147 * @ip: The GFS2 inode to unstuff
148 *
149 * This routine unstuffs a dinode and returns it to a "normal" state such
150 * that the height can be grown in the traditional way.
151 *
152 * Returns: errno
153 */
154
155int gfs2_unstuff_dinode(struct gfs2_inode *ip)
156{
157 struct inode *inode = &ip->i_inode;
81cb277e 158 struct folio *folio;
7a607a41
AG
159 int error;
160
161 down_write(&ip->i_rw_mutex);
81cb277e
MWO
162 folio = filemap_grab_folio(inode->i_mapping, 0);
163 error = PTR_ERR(folio);
164 if (IS_ERR(folio))
7a607a41 165 goto out;
81cb277e
MWO
166 error = __gfs2_unstuff_inode(ip, folio);
167 folio_unlock(folio);
168 folio_put(folio);
a91ea69f 169out:
b3b94faa 170 up_write(&ip->i_rw_mutex);
b3b94faa
DT
171 return error;
172}
173
b3b94faa
DT
174/**
175 * find_metapath - Find path through the metadata tree
9b8c81d1 176 * @sdp: The superblock
b3b94faa 177 * @block: The disk block to look up
07e23d68 178 * @mp: The metapath to return the result in
9b8c81d1 179 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
180 *
181 * This routine returns a struct metapath structure that defines a path
182 * through the metadata of inode "ip" to get to block "block".
183 *
184 * Example:
185 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
186 * filesystem with a blocksize of 4096.
187 *
188 * find_metapath() would return a struct metapath structure set to:
07e23d68 189 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
190 *
191 * That means that in order to get to the block containing the byte at
192 * offset 101342453, we would load the indirect block pointed to by pointer
193 * 0 in the dinode. We would then load the indirect block pointed to by
194 * pointer 48 in that indirect block. We would then load the data block
195 * pointed to by pointer 165 in that indirect block.
196 *
197 * ----------------------------------------
198 * | Dinode | |
199 * | | 4|
200 * | |0 1 2 3 4 5 9|
201 * | | 6|
202 * ----------------------------------------
203 * |
204 * |
205 * V
206 * ----------------------------------------
207 * | Indirect Block |
208 * | 5|
209 * | 4 4 4 4 4 5 5 1|
210 * |0 5 6 7 8 9 0 1 2|
211 * ----------------------------------------
212 * |
213 * |
214 * V
215 * ----------------------------------------
216 * | Indirect Block |
217 * | 1 1 1 1 1 5|
218 * | 6 6 6 6 6 1|
219 * |0 3 4 5 6 7 2|
220 * ----------------------------------------
221 * |
222 * |
223 * V
224 * ----------------------------------------
225 * | Data block containing offset |
226 * | 101342453 |
227 * | |
228 * | |
229 * ----------------------------------------
230 *
231 */
232
9b8c81d1
SW
233static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
234 struct metapath *mp, unsigned int height)
b3b94faa 235{
b3b94faa
DT
236 unsigned int i;
237
5f8bd444 238 mp->mp_fheight = height;
9b8c81d1 239 for (i = height; i--;)
7eabb77e 240 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
241}
242
5af4e7a0 243static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 244{
5af4e7a0
BM
245 if (mp->mp_list[0] == 0)
246 return 2;
247 return 1;
9b8c81d1
SW
248}
249
d552a2b9 250/**
20cdc193 251 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
252 * @height: The metadata height (0 = dinode)
253 * @mp: The metapath
254 */
255static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
256{
257 struct buffer_head *bh = mp->mp_bh[height];
258 if (height == 0)
259 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
260 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
261}
262
b3b94faa
DT
263/**
264 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
265 * @height: The metadata height (0 = dinode)
266 * @mp: The metapath
267 *
268 * Return a pointer to the block number of the next height of the metadata
269 * tree given a buffer containing the pointer to the current height of the
270 * metadata tree.
271 */
272
9b8c81d1 273static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 274{
d552a2b9
BP
275 __be64 *p = metaptr1(height, mp);
276 return p + mp->mp_list[height];
b3b94faa
DT
277}
278
7841b9f0
AG
279static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
280{
281 const struct buffer_head *bh = mp->mp_bh[height];
282 return (const __be64 *)(bh->b_data + bh->b_size);
283}
284
285static void clone_metapath(struct metapath *clone, struct metapath *mp)
286{
287 unsigned int hgt;
288
289 *clone = *mp;
290 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
291 get_bh(clone->mp_bh[hgt]);
292}
293
5cf26b1e 294static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 295{
b99b98dc
SW
296 const __be64 *t;
297
5cf26b1e 298 for (t = start; t < end; t++) {
c3ce5aa9
AG
299 struct buffer_head *rabh;
300
b99b98dc
SW
301 if (!*t)
302 continue;
303
304 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
305 if (trylock_buffer(rabh)) {
306 if (!buffer_uptodate(rabh)) {
307 rabh->b_end_io = end_buffer_read_sync;
1420c4a5
BVA
308 submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META |
309 REQ_PRIO, rabh);
b99b98dc
SW
310 continue;
311 }
312 unlock_buffer(rabh);
313 }
314 brelse(rabh);
315 }
316}
317
e8b43fe0
AG
318static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
319 unsigned int x, unsigned int h)
d552a2b9 320{
e8b43fe0
AG
321 for (; x < h; x++) {
322 __be64 *ptr = metapointer(x, mp);
323 u64 dblock = be64_to_cpu(*ptr);
324 int ret;
d552a2b9 325
e8b43fe0
AG
326 if (!dblock)
327 break;
6d8da302 328 ret = gfs2_meta_buffer(ip, GFS2_METATYPE_IN, dblock, &mp->mp_bh[x + 1]);
e8b43fe0
AG
329 if (ret)
330 return ret;
331 }
332 mp->mp_aheight = x + 1;
333 return 0;
d552a2b9
BP
334}
335
b3b94faa 336/**
9b8c81d1
SW
337 * lookup_metapath - Walk the metadata tree to a specific point
338 * @ip: The inode
b3b94faa 339 * @mp: The metapath
b3b94faa 340 *
9b8c81d1
SW
341 * Assumes that the inode's buffer has already been looked up and
342 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
343 * by find_metapath().
344 *
345 * If this function encounters part of the tree which has not been
346 * allocated, it returns the current height of the tree at the point
347 * at which it found the unallocated block. Blocks which are found are
348 * added to the mp->mp_bh[] list.
b3b94faa 349 *
e8b43fe0 350 * Returns: error
b3b94faa
DT
351 */
352
9b8c81d1 353static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 354{
e8b43fe0 355 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
356}
357
d552a2b9
BP
358/**
359 * fillup_metapath - fill up buffers for the metadata path to a specific height
360 * @ip: The inode
361 * @mp: The metapath
362 * @h: The height to which it should be mapped
363 *
364 * Similar to lookup_metapath, but does lookups for a range of heights
365 *
c3ce5aa9 366 * Returns: error or the number of buffers filled
d552a2b9
BP
367 */
368
369static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
370{
e8b43fe0 371 unsigned int x = 0;
c3ce5aa9 372 int ret;
d552a2b9
BP
373
374 if (h) {
375 /* find the first buffer we need to look up. */
e8b43fe0
AG
376 for (x = h - 1; x > 0; x--) {
377 if (mp->mp_bh[x])
378 break;
d552a2b9
BP
379 }
380 }
c3ce5aa9
AG
381 ret = __fillup_metapath(ip, mp, x, h);
382 if (ret)
383 return ret;
384 return mp->mp_aheight - x - 1;
d552a2b9
BP
385}
386
a27a0c9b
AG
387static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
388{
389 sector_t factor = 1, block = 0;
390 int hgt;
391
392 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
393 if (hgt < mp->mp_aheight)
394 block += mp->mp_list[hgt] * factor;
395 factor *= sdp->sd_inptrs;
396 }
397 return block;
398}
399
64bc06bb 400static void release_metapath(struct metapath *mp)
dbac6710
SW
401{
402 int i;
403
9b8c81d1
SW
404 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
405 if (mp->mp_bh[i] == NULL)
406 break;
407 brelse(mp->mp_bh[i]);
64bc06bb 408 mp->mp_bh[i] = NULL;
9b8c81d1 409 }
11707ea0
SW
410}
411
30cbf189
SW
412/**
413 * gfs2_extent_length - Returns length of an extent of blocks
bcfe9413
AG
414 * @bh: The metadata block
415 * @ptr: Current position in @bh
416 * @limit: Max extent length to return
30cbf189
SW
417 * @eob: Set to 1 if we hit "end of block"
418 *
30cbf189
SW
419 * Returns: The length of the extent (minimum of one block)
420 */
421
bcfe9413 422static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
30cbf189 423{
bcfe9413 424 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
30cbf189
SW
425 const __be64 *first = ptr;
426 u64 d = be64_to_cpu(*ptr);
427
428 *eob = 0;
429 do {
430 ptr++;
431 if (ptr >= end)
432 break;
bcfe9413 433 d++;
30cbf189
SW
434 } while(be64_to_cpu(*ptr) == d);
435 if (ptr >= end)
436 *eob = 1;
bcfe9413 437 return ptr - first;
30cbf189
SW
438}
439
a27a0c9b
AG
440enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
441
442/*
443 * gfs2_metadata_walker - walk an indirect block
444 * @mp: Metapath to indirect block
445 * @ptrs: Number of pointers to look at
446 *
447 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
448 * indirect block to follow.
449 */
450typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
451 unsigned int ptrs);
7841b9f0 452
a27a0c9b
AG
453/*
454 * gfs2_walk_metadata - walk a tree of indirect blocks
455 * @inode: The inode
456 * @mp: Starting point of walk
457 * @max_len: Maximum number of blocks to walk
458 * @walker: Called during the walk
459 *
460 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
461 * past the end of metadata, and a negative error code otherwise.
462 */
7841b9f0 463
a27a0c9b
AG
464static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
465 u64 max_len, gfs2_metadata_walker walker)
7841b9f0 466{
7841b9f0
AG
467 struct gfs2_inode *ip = GFS2_I(inode);
468 struct gfs2_sbd *sdp = GFS2_SB(inode);
7841b9f0
AG
469 u64 factor = 1;
470 unsigned int hgt;
a27a0c9b 471 int ret;
7841b9f0 472
a27a0c9b
AG
473 /*
474 * The walk starts in the lowest allocated indirect block, which may be
475 * before the position indicated by @mp. Adjust @max_len accordingly
476 * to avoid a short walk.
477 */
478 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
479 max_len += mp->mp_list[hgt] * factor;
480 mp->mp_list[hgt] = 0;
7841b9f0 481 factor *= sdp->sd_inptrs;
a27a0c9b 482 }
7841b9f0
AG
483
484 for (;;) {
a27a0c9b
AG
485 u16 start = mp->mp_list[hgt];
486 enum walker_status status;
487 unsigned int ptrs;
488 u64 len;
7841b9f0
AG
489
490 /* Walk indirect block. */
a27a0c9b
AG
491 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
492 len = ptrs * factor;
493 if (len > max_len)
494 ptrs = DIV_ROUND_UP_ULL(max_len, factor);
495 status = walker(mp, ptrs);
496 switch (status) {
497 case WALK_STOP:
498 return 1;
499 case WALK_FOLLOW:
500 BUG_ON(mp->mp_aheight == mp->mp_fheight);
501 ptrs = mp->mp_list[hgt] - start;
502 len = ptrs * factor;
7841b9f0 503 break;
a27a0c9b 504 case WALK_CONTINUE:
7841b9f0 505 break;
7841b9f0 506 }
a27a0c9b
AG
507 if (len >= max_len)
508 break;
509 max_len -= len;
510 if (status == WALK_FOLLOW)
511 goto fill_up_metapath;
7841b9f0
AG
512
513lower_metapath:
514 /* Decrease height of metapath. */
7841b9f0
AG
515 brelse(mp->mp_bh[hgt]);
516 mp->mp_bh[hgt] = NULL;
a27a0c9b 517 mp->mp_list[hgt] = 0;
7841b9f0
AG
518 if (!hgt)
519 break;
520 hgt--;
521 factor *= sdp->sd_inptrs;
522
523 /* Advance in metadata tree. */
524 (mp->mp_list[hgt])++;
566a2ab3
AG
525 if (hgt) {
526 if (mp->mp_list[hgt] >= sdp->sd_inptrs)
527 goto lower_metapath;
528 } else {
529 if (mp->mp_list[hgt] >= sdp->sd_diptrs)
7841b9f0 530 break;
7841b9f0
AG
531 }
532
533fill_up_metapath:
534 /* Increase height of metapath. */
7841b9f0
AG
535 ret = fillup_metapath(ip, mp, ip->i_height - 1);
536 if (ret < 0)
a27a0c9b 537 return ret;
7841b9f0
AG
538 hgt += ret;
539 for (; ret; ret--)
540 do_div(factor, sdp->sd_inptrs);
541 mp->mp_aheight = hgt + 1;
542 }
a27a0c9b 543 return 0;
7841b9f0
AG
544}
545
a27a0c9b
AG
546static enum walker_status gfs2_hole_walker(struct metapath *mp,
547 unsigned int ptrs)
7841b9f0 548{
a27a0c9b
AG
549 const __be64 *start, *ptr, *end;
550 unsigned int hgt;
551
552 hgt = mp->mp_aheight - 1;
553 start = metapointer(hgt, mp);
554 end = start + ptrs;
7841b9f0
AG
555
556 for (ptr = start; ptr < end; ptr++) {
557 if (*ptr) {
a27a0c9b 558 mp->mp_list[hgt] += ptr - start;
7841b9f0
AG
559 if (mp->mp_aheight == mp->mp_fheight)
560 return WALK_STOP;
a27a0c9b 561 return WALK_FOLLOW;
7841b9f0
AG
562 }
563 }
a27a0c9b 564 return WALK_CONTINUE;
7841b9f0
AG
565}
566
567/**
568 * gfs2_hole_size - figure out the size of a hole
569 * @inode: The inode
570 * @lblock: The logical starting block number
571 * @len: How far to look (in blocks)
572 * @mp: The metapath at lblock
573 * @iomap: The iomap to store the hole size in
574 *
575 * This function modifies @mp.
576 *
577 * Returns: errno on error
578 */
579static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
580 struct metapath *mp, struct iomap *iomap)
581{
a27a0c9b
AG
582 struct metapath clone;
583 u64 hole_size;
584 int ret;
585
586 clone_metapath(&clone, mp);
587 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
588 if (ret < 0)
589 goto out;
7841b9f0 590
a27a0c9b
AG
591 if (ret == 1)
592 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
593 else
594 hole_size = len;
595 iomap->length = hole_size << inode->i_blkbits;
596 ret = 0;
597
598out:
599 release_metapath(&clone);
7841b9f0
AG
600 return ret;
601}
602
b2963932
BP
603static inline void gfs2_indirect_init(struct metapath *mp,
604 struct gfs2_glock *gl, unsigned int i,
605 unsigned offset, u64 bn)
9b8c81d1
SW
606{
607 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
608 ((i > 1) ? sizeof(struct gfs2_meta_header) :
609 sizeof(struct gfs2_dinode)));
610 BUG_ON(i < 1);
611 BUG_ON(mp->mp_bh[i] != NULL);
612 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 613 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
614 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
615 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
616 ptr += offset;
617 *ptr = cpu_to_be64(bn);
9b8c81d1
SW
618}
619
620enum alloc_state {
621 ALLOC_DATA = 0,
622 ALLOC_GROW_DEPTH = 1,
623 ALLOC_GROW_HEIGHT = 2,
624 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
625};
626
627/**
54992257 628 * __gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 629 * @inode: The GFS2 inode
628e366d 630 * @iomap: The iomap structure
5f8bd444 631 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
632 *
633 * In this routine we may have to alloc:
634 * i) Indirect blocks to grow the metadata tree height
635 * ii) Indirect blocks to fill in lower part of the metadata tree
636 * iii) Data blocks
637 *
54992257 638 * This function is called after __gfs2_iomap_get, which works out the
64bc06bb
AG
639 * total number of blocks which we need via gfs2_alloc_size.
640 *
641 * We then do the actual allocation asking for an extent at a time (if
642 * enough contiguous free blocks are available, there will only be one
643 * allocation request per call) and uses the state machine to initialise
644 * the blocks in order.
9b8c81d1 645 *
628e366d
AG
646 * Right now, this function will allocate at most one indirect block
647 * worth of data -- with a default block size of 4K, that's slightly
648 * less than 2M. If this limitation is ever removed to allow huge
649 * allocations, we would probably still want to limit the iomap size we
650 * return to avoid stalling other tasks during huge writes; the next
651 * iomap iteration would then find the blocks already allocated.
652 *
9b8c81d1
SW
653 * Returns: errno on error
654 */
655
54992257
AG
656static int __gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
657 struct metapath *mp)
9b8c81d1
SW
658{
659 struct gfs2_inode *ip = GFS2_I(inode);
660 struct gfs2_sbd *sdp = GFS2_SB(inode);
661 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 662 u64 bn;
5af4e7a0 663 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
64bc06bb 664 size_t dblks = iomap->length >> inode->i_blkbits;
5f8bd444 665 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 666 int ret;
9b8c81d1
SW
667 enum alloc_state state;
668 __be64 *ptr;
669 __be64 zero_bn = 0;
670
5f8bd444 671 BUG_ON(mp->mp_aheight < 1);
9b8c81d1 672 BUG_ON(dibh == NULL);
64bc06bb 673 BUG_ON(dblks < 1);
9b8c81d1 674
350a9b0a 675 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 676
628e366d
AG
677 down_write(&ip->i_rw_mutex);
678
5f8bd444 679 if (mp->mp_fheight == mp->mp_aheight) {
64bc06bb 680 /* Bottom indirect block exists */
9b8c81d1
SW
681 state = ALLOC_DATA;
682 } else {
683 /* Need to allocate indirect blocks */
5f8bd444 684 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 685 /* Writing into existing tree, extend tree down */
5f8bd444 686 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
687 state = ALLOC_GROW_DEPTH;
688 } else {
689 /* Building up tree height */
690 state = ALLOC_GROW_HEIGHT;
5f8bd444 691 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 692 branch_start = metapath_branch_start(mp);
5f8bd444 693 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
694 }
695 }
696
697 /* start of the second part of the function (state machine) */
698
3974320c 699 blks = dblks + iblks;
5f8bd444 700 i = mp->mp_aheight;
9b8c81d1
SW
701 do {
702 n = blks - alloced;
628e366d
AG
703 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
704 if (ret)
705 goto out;
9b8c81d1
SW
706 alloced += n;
707 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
fbb27873 708 gfs2_trans_remove_revoke(sdp, bn, n);
9b8c81d1
SW
709 switch (state) {
710 /* Growing height of tree */
711 case ALLOC_GROW_HEIGHT:
712 if (i == 1) {
713 ptr = (__be64 *)(dibh->b_data +
714 sizeof(struct gfs2_dinode));
715 zero_bn = *ptr;
716 }
5f8bd444
BP
717 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
718 i++, n--)
9b8c81d1 719 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 720 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
721 i--;
722 gfs2_buffer_copy_tail(mp->mp_bh[i],
723 sizeof(struct gfs2_meta_header),
724 dibh, sizeof(struct gfs2_dinode));
725 gfs2_buffer_clear_tail(dibh,
726 sizeof(struct gfs2_dinode) +
727 sizeof(__be64));
728 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
729 sizeof(struct gfs2_meta_header));
730 *ptr = zero_bn;
731 state = ALLOC_GROW_DEPTH;
5f8bd444 732 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
733 if (mp->mp_bh[i] == NULL)
734 break;
735 brelse(mp->mp_bh[i]);
736 mp->mp_bh[i] = NULL;
737 }
5af4e7a0 738 i = branch_start;
9b8c81d1
SW
739 }
740 if (n == 0)
741 break;
df561f66 742 fallthrough; /* To branching from existing tree */
9b8c81d1 743 case ALLOC_GROW_DEPTH:
5f8bd444 744 if (i > 1 && i < mp->mp_fheight)
350a9b0a 745 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 746 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
747 gfs2_indirect_init(mp, ip->i_gl, i,
748 mp->mp_list[i-1], bn++);
5f8bd444 749 if (i == mp->mp_fheight)
9b8c81d1
SW
750 state = ALLOC_DATA;
751 if (n == 0)
752 break;
df561f66 753 fallthrough; /* To tree complete, adding data blocks */
9b8c81d1 754 case ALLOC_DATA:
3974320c 755 BUG_ON(n > dblks);
9b8c81d1 756 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 757 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 758 dblks = n;
9b8c81d1 759 ptr = metapointer(end_of_metadata, mp);
3974320c 760 iomap->addr = bn << inode->i_blkbits;
628e366d 761 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
762 while (n-- > 0)
763 *ptr++ = cpu_to_be64(bn++);
764 break;
765 }
3974320c 766 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 767
d505a96a 768 iomap->type = IOMAP_MAPPED;
3974320c 769 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 770 ip->i_height = mp->mp_fheight;
9b8c81d1 771 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
772 gfs2_dinode_out(ip, dibh->b_data);
773out:
774 up_write(&ip->i_rw_mutex);
775 return ret;
9b8c81d1
SW
776}
777
7ee66c03
CH
778#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
779
64bc06bb
AG
780/**
781 * gfs2_alloc_size - Compute the maximum allocation size
782 * @inode: The inode
783 * @mp: The metapath
784 * @size: Requested size in blocks
785 *
786 * Compute the maximum size of the next allocation at @mp.
787 *
788 * Returns: size in blocks
789 */
790static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
3974320c
BP
791{
792 struct gfs2_inode *ip = GFS2_I(inode);
64bc06bb
AG
793 struct gfs2_sbd *sdp = GFS2_SB(inode);
794 const __be64 *first, *ptr, *end;
795
796 /*
797 * For writes to stuffed files, this function is called twice via
54992257 798 * __gfs2_iomap_get, before and after unstuffing. The size we return the
64bc06bb
AG
799 * first time needs to be large enough to get the reservation and
800 * allocation sizes right. The size we return the second time must
54992257 801 * be exact or else __gfs2_iomap_alloc won't do the right thing.
64bc06bb
AG
802 */
803
804 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
805 unsigned int maxsize = mp->mp_fheight > 1 ?
806 sdp->sd_inptrs : sdp->sd_diptrs;
807 maxsize -= mp->mp_list[mp->mp_fheight - 1];
808 if (size > maxsize)
809 size = maxsize;
810 return size;
811 }
3974320c 812
64bc06bb
AG
813 first = metapointer(ip->i_height - 1, mp);
814 end = metaend(ip->i_height - 1, mp);
815 if (end - first > size)
816 end = first + size;
817 for (ptr = first; ptr < end; ptr++) {
818 if (*ptr)
819 break;
820 }
821 return ptr - first;
3974320c
BP
822}
823
824/**
54992257 825 * __gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
826 * @inode: The inode
827 * @pos: Starting position in bytes
828 * @length: Length to map, in bytes
829 * @flags: iomap flags
830 * @iomap: The iomap structure
628e366d 831 * @mp: The metapath
3974320c
BP
832 *
833 * Returns: errno
834 */
54992257
AG
835static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
836 unsigned flags, struct iomap *iomap,
837 struct metapath *mp)
b3b94faa 838{
feaa7bba
SW
839 struct gfs2_inode *ip = GFS2_I(inode);
840 struct gfs2_sbd *sdp = GFS2_SB(inode);
d505a96a 841 loff_t size = i_size_read(inode);
9b8c81d1 842 __be64 *ptr;
3974320c 843 sector_t lblock;
628e366d
AG
844 sector_t lblock_stop;
845 int ret;
9b8c81d1 846 int eob;
628e366d 847 u64 len;
d505a96a 848 struct buffer_head *dibh = NULL, *bh;
9b8c81d1 849 u8 height;
7276b3b0 850
628e366d
AG
851 if (!length)
852 return -EINVAL;
b3b94faa 853
d505a96a
AG
854 down_read(&ip->i_rw_mutex);
855
856 ret = gfs2_meta_inode_buffer(ip, &dibh);
857 if (ret)
858 goto unlock;
c26b5aa8 859 mp->mp_bh[0] = dibh;
d505a96a 860
49edd5bf 861 if (gfs2_is_stuffed(ip)) {
d505a96a
AG
862 if (flags & IOMAP_WRITE) {
863 loff_t max_size = gfs2_max_stuffed_size(ip);
864
865 if (pos + length > max_size)
866 goto unstuff;
867 iomap->length = max_size;
868 } else {
869 if (pos >= size) {
870 if (flags & IOMAP_REPORT) {
871 ret = -ENOENT;
872 goto unlock;
873 } else {
d505a96a
AG
874 iomap->offset = pos;
875 iomap->length = length;
566a2ab3 876 goto hole_found;
d505a96a
AG
877 }
878 }
879 iomap->length = size;
49edd5bf 880 }
d505a96a
AG
881 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
882 sizeof(struct gfs2_dinode);
883 iomap->type = IOMAP_INLINE;
64bc06bb 884 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
d505a96a 885 goto out;
3974320c 886 }
d505a96a
AG
887
888unstuff:
3974320c 889 lblock = pos >> inode->i_blkbits;
3974320c 890 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
891 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
892 len = lblock_stop - lblock + 1;
d505a96a 893 iomap->length = len << inode->i_blkbits;
628e366d 894
9b8c81d1 895 height = ip->i_height;
9a38662b 896 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 897 height++;
628e366d 898 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
899 if (height > ip->i_height || gfs2_is_stuffed(ip))
900 goto do_alloc;
3974320c 901
628e366d 902 ret = lookup_metapath(ip, mp);
e8b43fe0 903 if (ret)
628e366d 904 goto unlock;
3974320c 905
628e366d 906 if (mp->mp_aheight != ip->i_height)
9b8c81d1 907 goto do_alloc;
3974320c 908
628e366d 909 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
910 if (*ptr == 0)
911 goto do_alloc;
3974320c 912
628e366d 913 bh = mp->mp_bh[ip->i_height - 1];
bcfe9413 914 len = gfs2_extent_length(bh, ptr, len, &eob);
3974320c 915
628e366d
AG
916 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
917 iomap->length = len << inode->i_blkbits;
918 iomap->type = IOMAP_MAPPED;
0ed91eca 919 iomap->flags |= IOMAP_F_MERGED;
9b8c81d1 920 if (eob)
7ee66c03 921 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 922
3974320c 923out:
628e366d
AG
924 iomap->bdev = inode->i_sb->s_bdev;
925unlock:
926 up_read(&ip->i_rw_mutex);
9b8c81d1 927 return ret;
30cbf189 928
9b8c81d1 929do_alloc:
628e366d 930 if (flags & IOMAP_REPORT) {
49edd5bf 931 if (pos >= size)
3974320c 932 ret = -ENOENT;
628e366d
AG
933 else if (height == ip->i_height)
934 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf 935 else
f3506eee 936 iomap->length = size - iomap->offset;
64bc06bb
AG
937 } else if (flags & IOMAP_WRITE) {
938 u64 alloc_size;
939
967bcc91
AG
940 if (flags & IOMAP_DIRECT)
941 goto out; /* (see gfs2_file_direct_write) */
942
64bc06bb
AG
943 len = gfs2_alloc_size(inode, mp, len);
944 alloc_size = len << inode->i_blkbits;
945 if (alloc_size < iomap->length)
946 iomap->length = alloc_size;
947 } else {
d505a96a
AG
948 if (pos < size && height == ip->i_height)
949 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
b3b94faa 950 }
566a2ab3
AG
951hole_found:
952 iomap->addr = IOMAP_NULL_ADDR;
953 iomap->type = IOMAP_HOLE;
628e366d 954 goto out;
3974320c
BP
955}
956
9060bc4d 957static struct folio *
c82abc23 958gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
d0a22a4b 959{
9060bc4d 960 struct inode *inode = iter->inode;
2741b672 961 unsigned int blockmask = i_blocksize(inode) - 1;
d0a22a4b 962 struct gfs2_sbd *sdp = GFS2_SB(inode);
2741b672 963 unsigned int blocks;
9060bc4d
AG
964 struct folio *folio;
965 int status;
d0a22a4b 966
2741b672 967 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
9060bc4d
AG
968 status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
969 if (status)
970 return ERR_PTR(status);
971
d6bb59a9 972 folio = iomap_get_folio(iter, pos, len);
9060bc4d
AG
973 if (IS_ERR(folio))
974 gfs2_trans_end(sdp);
975 return folio;
d0a22a4b
AG
976}
977
40405ddd 978static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
80baab88 979 unsigned copied, struct folio *folio)
64bc06bb 980{
706cb549 981 struct gfs2_trans *tr = current->journal_info;
64bc06bb 982 struct gfs2_inode *ip = GFS2_I(inode);
d0a22a4b 983 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 984
80baab88 985 if (!gfs2_is_stuffed(ip))
c1b0c3cf
AG
986 gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos),
987 copied);
80baab88
AG
988
989 folio_unlock(folio);
990 folio_put(folio);
706cb549
AG
991
992 if (tr->tr_num_buf_new)
993 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
994
d0a22a4b 995 gfs2_trans_end(sdp);
64bc06bb
AG
996}
997
471859f5 998static const struct iomap_folio_ops gfs2_iomap_folio_ops = {
c82abc23 999 .get_folio = gfs2_iomap_get_folio,
40405ddd 1000 .put_folio = gfs2_iomap_put_folio,
df0db3ec
AG
1001};
1002
64bc06bb
AG
1003static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1004 loff_t length, unsigned flags,
c26b5aa8
AG
1005 struct iomap *iomap,
1006 struct metapath *mp)
64bc06bb 1007{
64bc06bb
AG
1008 struct gfs2_inode *ip = GFS2_I(inode);
1009 struct gfs2_sbd *sdp = GFS2_SB(inode);
34aad20b 1010 bool unstuff;
64bc06bb
AG
1011 int ret;
1012
64bc06bb
AG
1013 unstuff = gfs2_is_stuffed(ip) &&
1014 pos + length > gfs2_max_stuffed_size(ip);
1015
34aad20b
AG
1016 if (unstuff || iomap->type == IOMAP_HOLE) {
1017 unsigned int data_blocks, ind_blocks;
1018 struct gfs2_alloc_parms ap = {};
1019 unsigned int rblocks;
1020 struct gfs2_trans *tr;
64bc06bb 1021
64bc06bb
AG
1022 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1023 &ind_blocks);
34aad20b 1024 ap.target = data_blocks + ind_blocks;
64bc06bb
AG
1025 ret = gfs2_quota_lock_check(ip, &ap);
1026 if (ret)
34aad20b 1027 return ret;
64bc06bb
AG
1028
1029 ret = gfs2_inplace_reserve(ip, &ap);
1030 if (ret)
1031 goto out_qunlock;
64bc06bb 1032
34aad20b
AG
1033 rblocks = RES_DINODE + ind_blocks;
1034 if (gfs2_is_jdata(ip))
1035 rblocks += data_blocks;
1036 if (ind_blocks || data_blocks)
1037 rblocks += RES_STATFS + RES_QUOTA;
1038 if (inode == sdp->sd_rindex)
1039 rblocks += 2 * RES_STATFS;
64bc06bb
AG
1040 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1041
d0a22a4b
AG
1042 ret = gfs2_trans_begin(sdp, rblocks,
1043 iomap->length >> inode->i_blkbits);
64bc06bb 1044 if (ret)
d0a22a4b
AG
1045 goto out_trans_fail;
1046
1047 if (unstuff) {
7a607a41 1048 ret = gfs2_unstuff_dinode(ip);
d0a22a4b
AG
1049 if (ret)
1050 goto out_trans_end;
1051 release_metapath(mp);
54992257
AG
1052 ret = __gfs2_iomap_get(inode, iomap->offset,
1053 iomap->length, flags, iomap, mp);
d0a22a4b
AG
1054 if (ret)
1055 goto out_trans_end;
1056 }
64bc06bb 1057
d0a22a4b 1058 if (iomap->type == IOMAP_HOLE) {
54992257 1059 ret = __gfs2_iomap_alloc(inode, iomap, mp);
d0a22a4b
AG
1060 if (ret) {
1061 gfs2_trans_end(sdp);
1062 gfs2_inplace_release(ip);
1063 punch_hole(ip, iomap->offset, iomap->length);
1064 goto out_qunlock;
1065 }
64bc06bb 1066 }
d0a22a4b
AG
1067
1068 tr = current->journal_info;
1069 if (tr->tr_num_buf_new)
1070 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
d0a22a4b
AG
1071
1072 gfs2_trans_end(sdp);
64bc06bb 1073 }
d0a22a4b
AG
1074
1075 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
471859f5 1076 iomap->folio_ops = &gfs2_iomap_folio_ops;
64bc06bb
AG
1077 return 0;
1078
1079out_trans_end:
1080 gfs2_trans_end(sdp);
1081out_trans_fail:
34aad20b 1082 gfs2_inplace_release(ip);
64bc06bb 1083out_qunlock:
34aad20b 1084 gfs2_quota_unlock(ip);
64bc06bb
AG
1085 return ret;
1086}
1087
628e366d 1088static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
c039b997
GR
1089 unsigned flags, struct iomap *iomap,
1090 struct iomap *srcmap)
628e366d
AG
1091{
1092 struct gfs2_inode *ip = GFS2_I(inode);
1093 struct metapath mp = { .mp_aheight = 1, };
1094 int ret;
1095
2164f9b9
CH
1096 if (gfs2_is_jdata(ip))
1097 iomap->flags |= IOMAP_F_BUFFER_HEAD;
0ed91eca 1098
628e366d 1099 trace_gfs2_iomap_start(ip, pos, length, flags);
54992257 1100 ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
34aad20b
AG
1101 if (ret)
1102 goto out_unlock;
1103
72d36d05 1104 switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) {
34aad20b
AG
1105 case IOMAP_WRITE:
1106 if (flags & IOMAP_DIRECT) {
1107 /*
1108 * Silently fall back to buffered I/O for stuffed files
1109 * or if we've got a hole (see gfs2_file_direct_write).
1110 */
1111 if (iomap->type != IOMAP_MAPPED)
1112 ret = -ENOTBLK;
1113 goto out_unlock;
1114 }
1115 break;
72d36d05
AG
1116 case IOMAP_ZERO:
1117 if (iomap->type == IOMAP_HOLE)
1118 goto out_unlock;
1119 break;
34aad20b
AG
1120 default:
1121 goto out_unlock;
1122 }
1123
1124 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1125
1126out_unlock:
c26b5aa8 1127 release_metapath(&mp);
628e366d
AG
1128 trace_gfs2_iomap_end(ip, iomap, ret);
1129 return ret;
1130}
1131
64bc06bb
AG
1132static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1133 ssize_t written, unsigned flags, struct iomap *iomap)
1134{
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1137
72d36d05 1138 switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
34aad20b
AG
1139 case IOMAP_WRITE:
1140 if (flags & IOMAP_DIRECT)
1141 return 0;
1142 break;
72d36d05
AG
1143 case IOMAP_ZERO:
1144 if (iomap->type == IOMAP_HOLE)
1145 return 0;
1146 break;
34aad20b
AG
1147 default:
1148 return 0;
1149 }
64bc06bb 1150
d0a22a4b 1151 if (!gfs2_is_stuffed(ip))
64bc06bb
AG
1152 gfs2_ordered_add_inode(ip);
1153
d0a22a4b 1154 if (inode == sdp->sd_rindex)
64bc06bb 1155 adjust_fs_space(inode);
64bc06bb 1156
64bc06bb
AG
1157 gfs2_inplace_release(ip);
1158
7009fa9c
AG
1159 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1160 gfs2_quota_unlock(ip);
1161
64bc06bb
AG
1162 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1163 /* Deallocate blocks that were just allocated. */
d031a886
AG
1164 loff_t hstart = round_up(pos + written, i_blocksize(inode));
1165 loff_t hend = iomap->offset + iomap->length;
64bc06bb 1166
d031a886
AG
1167 if (hstart < hend) {
1168 truncate_pagecache_range(inode, hstart, hend - 1);
1169 punch_hole(ip, hstart, hend - hstart);
64bc06bb
AG
1170 }
1171 }
1172
706cb549 1173 if (unlikely(!written))
b924bdab 1174 return 0;
706cb549 1175
8d3e72a1
AG
1176 if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1177 mark_inode_dirty(inode);
706cb549 1178 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
64bc06bb
AG
1179 return 0;
1180}
1181
628e366d
AG
1182const struct iomap_ops gfs2_iomap_ops = {
1183 .iomap_begin = gfs2_iomap_begin,
64bc06bb 1184 .iomap_end = gfs2_iomap_end,
628e366d
AG
1185};
1186
3974320c 1187/**
d39d18e0 1188 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
1189 * @inode: The inode
1190 * @lblock: The logical block number
1191 * @bh_map: The bh to be mapped
1192 * @create: True if its ok to alloc blocks to satify the request
1193 *
d39d18e0
AG
1194 * The size of the requested mapping is defined in bh_map->b_size.
1195 *
1196 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1197 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1198 * bh_map->b_size to indicate the size of the mapping when @lblock and
1199 * successive blocks are mapped, up to the requested size.
1200 *
1201 * Sets buffer_boundary() if a read of metadata will be required
1202 * before the next block can be mapped. Sets buffer_new() if new
1203 * blocks were allocated.
3974320c
BP
1204 *
1205 * Returns: errno
1206 */
1207
1208int gfs2_block_map(struct inode *inode, sector_t lblock,
1209 struct buffer_head *bh_map, int create)
1210{
1211 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
1212 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1213 loff_t length = bh_map->b_size;
628e366d
AG
1214 struct iomap iomap = { };
1215 int ret;
3974320c
BP
1216
1217 clear_buffer_mapped(bh_map);
1218 clear_buffer_new(bh_map);
1219 clear_buffer_boundary(bh_map);
1220 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1221
54992257
AG
1222 if (!create)
1223 ret = gfs2_iomap_get(inode, pos, length, &iomap);
1224 else
1225 ret = gfs2_iomap_alloc(inode, pos, length, &iomap);
628e366d
AG
1226 if (ret)
1227 goto out;
3974320c
BP
1228
1229 if (iomap.length > bh_map->b_size) {
1230 iomap.length = bh_map->b_size;
7ee66c03 1231 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 1232 }
3974320c
BP
1233 if (iomap.addr != IOMAP_NULL_ADDR)
1234 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1235 bh_map->b_size = iomap.length;
7ee66c03 1236 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
1237 set_buffer_boundary(bh_map);
1238 if (iomap.flags & IOMAP_F_NEW)
1239 set_buffer_new(bh_map);
1240
1241out:
1242 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1243 return ret;
fd88de56
SW
1244}
1245
9153dac1
AG
1246int gfs2_get_extent(struct inode *inode, u64 lblock, u64 *dblock,
1247 unsigned int *extlen)
fd88de56 1248{
9153dac1
AG
1249 unsigned int blkbits = inode->i_blkbits;
1250 struct iomap iomap = { };
1251 unsigned int len;
7a6bbacb 1252 int ret;
9153dac1
AG
1253
1254 ret = gfs2_iomap_get(inode, lblock << blkbits, *extlen << blkbits,
1255 &iomap);
1256 if (ret)
1257 return ret;
1258 if (iomap.type != IOMAP_MAPPED)
1259 return -EIO;
1260 *dblock = iomap.addr >> blkbits;
1261 len = iomap.length >> blkbits;
1262 if (len < *extlen)
1263 *extlen = len;
1264 return 0;
1265}
1266
1267int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
1268 unsigned int *extlen, bool *new)
1269{
1270 unsigned int blkbits = inode->i_blkbits;
1271 struct iomap iomap = { };
1272 unsigned int len;
1273 int ret;
1274
1275 ret = gfs2_iomap_alloc(inode, lblock << blkbits, *extlen << blkbits,
1276 &iomap);
1277 if (ret)
1278 return ret;
1279 if (iomap.type != IOMAP_MAPPED)
1280 return -EIO;
1281 *dblock = iomap.addr >> blkbits;
1282 len = iomap.length >> blkbits;
1283 if (len < *extlen)
1284 *extlen = len;
1285 *new = iomap.flags & IOMAP_F_NEW;
1286 return 0;
b3b94faa
DT
1287}
1288
70499cdf
BP
1289/*
1290 * NOTE: Never call gfs2_block_zero_range with an open transaction because it
1291 * uses iomap write to perform its actions, which begin their own transactions
c82abc23 1292 * (iomap_begin, get_folio, etc.)
70499cdf 1293 */
bdba0d5e
AG
1294static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1295 unsigned int length)
ba7f7290 1296{
70499cdf 1297 BUG_ON(current->journal_info);
2257e468 1298 return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
ba7f7290
SW
1299}
1300
c62baf65
FF
1301#define GFS2_JTRUNC_REVOKES 8192
1302
fa731fc4
SW
1303/**
1304 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1305 * @inode: The inode being truncated
1306 * @oldsize: The original (larger) size
1307 * @newsize: The new smaller size
1308 *
1309 * With jdata files, we have to journal a revoke for each block which is
1310 * truncated. As a result, we need to split this into separate transactions
1311 * if the number of pages being truncated gets too large.
1312 */
1313
fa731fc4
SW
1314static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1315{
1316 struct gfs2_sbd *sdp = GFS2_SB(inode);
1317 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1318 u64 chunk;
1319 int error;
1320
1321 while (oldsize != newsize) {
e7fdf004
AG
1322 struct gfs2_trans *tr;
1323 unsigned int offs;
1324
fa731fc4
SW
1325 chunk = oldsize - newsize;
1326 if (chunk > max_chunk)
1327 chunk = max_chunk;
e7fdf004
AG
1328
1329 offs = oldsize & ~PAGE_MASK;
1330 if (offs && chunk > PAGE_SIZE)
1331 chunk = offs + ((chunk - offs) & PAGE_MASK);
1332
7caef267 1333 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1334 oldsize -= chunk;
e7fdf004
AG
1335
1336 tr = current->journal_info;
1337 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1338 continue;
1339
fa731fc4
SW
1340 gfs2_trans_end(sdp);
1341 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1342 if (error)
1343 return error;
1344 }
1345
1346 return 0;
1347}
1348
8b5860a3 1349static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1350{
ff8f33c8
SW
1351 struct gfs2_inode *ip = GFS2_I(inode);
1352 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1353 struct buffer_head *dibh = NULL;
b3b94faa 1354 int journaled = gfs2_is_jdata(ip);
8b5860a3 1355 u64 oldsize = inode->i_size;
b3b94faa
DT
1356 int error;
1357
70499cdf
BP
1358 if (!gfs2_is_stuffed(ip)) {
1359 unsigned int blocksize = i_blocksize(inode);
1360 unsigned int offs = newsize & (blocksize - 1);
1361 if (offs) {
1362 error = gfs2_block_zero_range(inode, newsize,
1363 blocksize - offs);
1364 if (error)
1365 return error;
1366 }
1367 }
fa731fc4
SW
1368 if (journaled)
1369 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1370 else
1371 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1372 if (error)
1373 return error;
1374
1375 error = gfs2_meta_inode_buffer(ip, &dibh);
1376 if (error)
1377 goto out;
1378
350a9b0a 1379 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1380
70499cdf 1381 if (gfs2_is_stuffed(ip))
ff8f33c8 1382 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
70499cdf 1383 else
ff8f33c8 1384 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1385
ff8f33c8 1386 i_size_write(inode, newsize);
8a8b8d91 1387 ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
ff8f33c8 1388 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1389
fa731fc4
SW
1390 if (journaled)
1391 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1392 else
7caef267 1393 truncate_pagecache(inode, newsize);
fa731fc4 1394
a91ea69f 1395out:
80990f40
AG
1396 brelse(dibh);
1397 if (current->journal_info)
1398 gfs2_trans_end(sdp);
b3b94faa
DT
1399 return error;
1400}
1401
54992257
AG
1402int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
1403 struct iomap *iomap)
1404{
1405 struct metapath mp = { .mp_aheight = 1, };
1406 int ret;
1407
1408 ret = __gfs2_iomap_get(inode, pos, length, 0, iomap, &mp);
1409 release_metapath(&mp);
1410 return ret;
1411}
1412
1413int gfs2_iomap_alloc(struct inode *inode, loff_t pos, loff_t length,
1414 struct iomap *iomap)
628e366d
AG
1415{
1416 struct metapath mp = { .mp_aheight = 1, };
1417 int ret;
1418
54992257 1419 ret = __gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
628e366d 1420 if (!ret && iomap->type == IOMAP_HOLE)
54992257 1421 ret = __gfs2_iomap_alloc(inode, iomap, &mp);
628e366d
AG
1422 release_metapath(&mp);
1423 return ret;
1424}
1425
d552a2b9
BP
1426/**
1427 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1428 * @ip: inode
c551f66c 1429 * @rd_gh: holder of resource group glock
5cf26b1e
AG
1430 * @bh: buffer head to sweep
1431 * @start: starting point in bh
1432 * @end: end point in bh
1433 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1434 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1435 *
1436 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1437 * free, and free them all. However, we do it one rgrp at a time. If this
1438 * block has references to multiple rgrps, we break it into individual
1439 * transactions. This allows other processes to use the rgrps while we're
1440 * focused on a single one, for better concurrency / performance.
1441 * At every transaction boundary, we rewrite the inode into the journal.
1442 * That way the bitmaps are kept consistent with the inode and we can recover
1443 * if we're interrupted by power-outages.
1444 *
1445 * Returns: 0, or return code if an error occurred.
1446 * *btotal has the total number of blocks freed
1447 */
1448static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1449 struct buffer_head *bh, __be64 *start, __be64 *end,
1450 bool meta, u32 *btotal)
b3b94faa 1451{
9b8c81d1 1452 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1453 struct gfs2_rgrpd *rgd;
1454 struct gfs2_trans *tr;
5cf26b1e 1455 __be64 *p;
d552a2b9
BP
1456 int blks_outside_rgrp;
1457 u64 bn, bstart, isize_blks;
1458 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1459 int ret = 0;
1460 bool buf_in_tr = false; /* buffer was added to transaction */
1461
d552a2b9 1462more_rgrps:
5cf26b1e
AG
1463 rgd = NULL;
1464 if (gfs2_holder_initialized(rd_gh)) {
1465 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1466 gfs2_assert_withdraw(sdp,
1467 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1468 }
d552a2b9
BP
1469 blks_outside_rgrp = 0;
1470 bstart = 0;
1471 blen = 0;
d552a2b9 1472
5cf26b1e 1473 for (p = start; p < end; p++) {
d552a2b9
BP
1474 if (!*p)
1475 continue;
1476 bn = be64_to_cpu(*p);
5cf26b1e
AG
1477
1478 if (rgd) {
1479 if (!rgrp_contains_block(rgd, bn)) {
1480 blks_outside_rgrp++;
1481 continue;
1482 }
d552a2b9 1483 } else {
90bcab99 1484 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1485 if (unlikely(!rgd)) {
1486 ret = -EIO;
1487 goto out;
1488 }
d552a2b9 1489 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
4fc7ec31 1490 LM_FLAG_NODE_SCOPE, rd_gh);
d552a2b9
BP
1491 if (ret)
1492 goto out;
1493
1494 /* Must be done with the rgrp glock held: */
1495 if (gfs2_rs_active(&ip->i_res) &&
c65b76b8 1496 rgd == ip->i_res.rs_rgd)
d552a2b9
BP
1497 gfs2_rs_deltree(&ip->i_res);
1498 }
1499
d552a2b9
BP
1500 /* The size of our transactions will be unknown until we
1501 actually process all the metadata blocks that relate to
1502 the rgrp. So we estimate. We know it can't be more than
1503 the dinode's i_blocks and we don't want to exceed the
1504 journal flush threshold, sd_log_thresh2. */
1505 if (current->journal_info == NULL) {
1506 unsigned int jblocks_rqsted, revokes;
1507
1508 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1509 RES_INDIRECT;
1510 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1511 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1512 jblocks_rqsted +=
1513 atomic_read(&sdp->sd_log_thresh2);
1514 else
1515 jblocks_rqsted += isize_blks;
1516 revokes = jblocks_rqsted;
1517 if (meta)
5cf26b1e 1518 revokes += end - start;
d552a2b9
BP
1519 else if (ip->i_depth)
1520 revokes += sdp->sd_inptrs;
1521 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1522 if (ret)
1523 goto out_unlock;
1524 down_write(&ip->i_rw_mutex);
1525 }
1526 /* check if we will exceed the transaction blocks requested */
1527 tr = current->journal_info;
1528 if (tr->tr_num_buf_new + RES_STATFS +
1529 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1530 /* We set blks_outside_rgrp to ensure the loop will
1531 be repeated for the same rgrp, but with a new
1532 transaction. */
1533 blks_outside_rgrp++;
1534 /* This next part is tricky. If the buffer was added
1535 to the transaction, we've already set some block
1536 pointers to 0, so we better follow through and free
1537 them, or we will introduce corruption (so break).
1538 This may be impossible, or at least rare, but I
1539 decided to cover the case regardless.
1540
1541 If the buffer was not added to the transaction
1542 (this call), doing so would exceed our transaction
1543 size, so we need to end the transaction and start a
1544 new one (so goto). */
1545
1546 if (buf_in_tr)
1547 break;
1548 goto out_unlock;
1549 }
1550
1551 gfs2_trans_add_meta(ip->i_gl, bh);
1552 buf_in_tr = true;
1553 *p = 0;
1554 if (bstart + blen == bn) {
1555 blen++;
1556 continue;
1557 }
1558 if (bstart) {
0ddeded4 1559 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1560 (*btotal) += blen;
1561 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1562 }
1563 bstart = bn;
1564 blen = 1;
1565 }
1566 if (bstart) {
0ddeded4 1567 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1568 (*btotal) += blen;
1569 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1570 }
1571out_unlock:
1572 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1573 outside the rgrp we just processed,
1574 do it all over again. */
1575 if (current->journal_info) {
5cf26b1e
AG
1576 struct buffer_head *dibh;
1577
1578 ret = gfs2_meta_inode_buffer(ip, &dibh);
1579 if (ret)
1580 goto out;
d552a2b9
BP
1581
1582 /* Every transaction boundary, we rewrite the dinode
1583 to keep its di_blocks current in case of failure. */
8a8b8d91 1584 ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
d552a2b9
BP
1585 gfs2_trans_add_meta(ip->i_gl, dibh);
1586 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1587 brelse(dibh);
d552a2b9
BP
1588 up_write(&ip->i_rw_mutex);
1589 gfs2_trans_end(sdp);
f0b444b3 1590 buf_in_tr = false;
d552a2b9
BP
1591 }
1592 gfs2_glock_dq_uninit(rd_gh);
1593 cond_resched();
1594 goto more_rgrps;
1595 }
1596out:
1597 return ret;
1598}
1599
10d2cf94
AG
1600static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1601{
1602 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1603 return false;
1604 return true;
1605}
1606
d552a2b9
BP
1607/**
1608 * find_nonnull_ptr - find a non-null pointer given a metapath and height
c551f66c 1609 * @sdp: The superblock
d552a2b9
BP
1610 * @mp: starting metapath
1611 * @h: desired height to search
c551f66c
LJ
1612 * @end_list: See punch_hole().
1613 * @end_aligned: See punch_hole().
d552a2b9 1614 *
10d2cf94 1615 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1616 * Returns: true if a non-null pointer was found in the metapath buffer
1617 * false if all remaining pointers are NULL in the buffer
1618 */
1619static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1620 unsigned int h,
1621 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1622{
10d2cf94
AG
1623 struct buffer_head *bh = mp->mp_bh[h];
1624 __be64 *first, *ptr, *end;
1625
1626 first = metaptr1(h, mp);
1627 ptr = first + mp->mp_list[h];
1628 end = (__be64 *)(bh->b_data + bh->b_size);
1629 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1630 bool keep_end = h < end_aligned;
1631 end = first + end_list[h] + keep_end;
1632 }
d552a2b9 1633
10d2cf94 1634 while (ptr < end) {
c4a9d189 1635 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1636 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1637 h++;
1638 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1639 mp->mp_list[h] = 0;
d552a2b9 1640 return true;
c4a9d189 1641 }
10d2cf94 1642 ptr++;
d552a2b9 1643 }
10d2cf94 1644 return false;
d552a2b9
BP
1645}
1646
1647enum dealloc_states {
1648 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1649 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1650 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1651 DEALLOC_DONE = 3, /* process complete */
1652};
b3b94faa 1653
5cf26b1e
AG
1654static inline void
1655metapointer_range(struct metapath *mp, int height,
1656 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1657 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1658 __be64 **start, __be64 **end)
1659{
1660 struct buffer_head *bh = mp->mp_bh[height];
1661 __be64 *first;
1662
1663 first = metaptr1(height, mp);
1664 *start = first;
1665 if (mp_eq_to_hgt(mp, start_list, height)) {
1666 bool keep_start = height < start_aligned;
1667 *start = first + start_list[height] + keep_start;
1668 }
1669 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1670 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1671 bool keep_end = height < end_aligned;
1672 *end = first + end_list[height] + keep_end;
1673 }
1674}
1675
1676static inline bool walk_done(struct gfs2_sbd *sdp,
1677 struct metapath *mp, int height,
1678 __u16 *end_list, unsigned int end_aligned)
1679{
1680 __u16 end;
1681
1682 if (end_list) {
1683 bool keep_end = height < end_aligned;
1684 if (!mp_eq_to_hgt(mp, end_list, height))
1685 return false;
1686 end = end_list[height] + keep_end;
1687 } else
1688 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1689 return mp->mp_list[height] >= end;
5cf26b1e
AG
1690}
1691
d552a2b9 1692/**
10d2cf94 1693 * punch_hole - deallocate blocks in a file
d552a2b9 1694 * @ip: inode to truncate
10d2cf94
AG
1695 * @offset: the start of the hole
1696 * @length: the size of the hole (or 0 for truncate)
1697 *
1698 * Punch a hole into a file or truncate a file at a given position. This
1699 * function operates in whole blocks (@offset and @length are rounded
1700 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1701 *
10d2cf94
AG
1702 * This function works from the bottom up, and from the right to the left. In
1703 * other words, it strips off the highest layer (data) before stripping any of
1704 * the metadata. Doing it this way is best in case the operation is interrupted
1705 * by power failure, etc. The dinode is rewritten in every transaction to
1706 * guarantee integrity.
d552a2b9 1707 */
10d2cf94 1708static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1709{
1710 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1711 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1712 struct metapath mp = {};
d552a2b9
BP
1713 struct buffer_head *dibh, *bh;
1714 struct gfs2_holder rd_gh;
cb7f0903 1715 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1716 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1717 __u16 start_list[GFS2_MAX_META_HEIGHT];
1718 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
3f649ab7 1719 unsigned int start_aligned, end_aligned;
d552a2b9
BP
1720 unsigned int strip_h = ip->i_height - 1;
1721 u32 btotal = 0;
1722 int ret, state;
1723 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1724 u64 prev_bnr = 0;
5cf26b1e 1725 __be64 *start, *end;
b3b94faa 1726
bb491ce6
AG
1727 if (offset >= maxsize) {
1728 /*
e4f82bf2
BP
1729 * The starting point lies beyond the allocated metadata;
1730 * there are no blocks to deallocate.
bb491ce6
AG
1731 */
1732 return 0;
1733 }
1734
10d2cf94
AG
1735 /*
1736 * The start position of the hole is defined by lblock, start_list, and
1737 * start_aligned. The end position of the hole is defined by lend,
1738 * end_list, and end_aligned.
1739 *
1740 * start_aligned and end_aligned define down to which height the start
1741 * and end positions are aligned to the metadata tree (i.e., the
1742 * position is a multiple of the metadata granularity at the height
1743 * above). This determines at which heights additional meta pointers
1744 * needs to be preserved for the remaining data.
1745 */
b3b94faa 1746
10d2cf94 1747 if (length) {
10d2cf94
AG
1748 u64 end_offset = offset + length;
1749 u64 lend;
1750
1751 /*
1752 * Clip the end at the maximum file size for the given height:
1753 * that's how far the metadata goes; files bigger than that
1754 * will have additional layers of indirection.
1755 */
1756 if (end_offset > maxsize)
1757 end_offset = maxsize;
1758 lend = end_offset >> bsize_shift;
1759
1760 if (lblock >= lend)
1761 return 0;
1762
1763 find_metapath(sdp, lend, &mp, ip->i_height);
1764 end_list = __end_list;
1765 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1766
1767 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1768 if (end_list[mp_h])
1769 break;
1770 }
1771 end_aligned = mp_h;
1772 }
1773
1774 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1775 memcpy(start_list, mp.mp_list, sizeof(start_list));
1776
cb7f0903
AG
1777 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1778 if (start_list[mp_h])
1779 break;
1780 }
1781 start_aligned = mp_h;
d552a2b9
BP
1782
1783 ret = gfs2_meta_inode_buffer(ip, &dibh);
1784 if (ret)
1785 return ret;
b3b94faa 1786
d552a2b9
BP
1787 mp.mp_bh[0] = dibh;
1788 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1789 if (ret)
1790 goto out_metapath;
c3ce5aa9
AG
1791
1792 /* issue read-ahead on metadata */
5cf26b1e
AG
1793 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1794 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1795 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1796 gfs2_metapath_ra(ip->i_gl, start, end);
1797 }
c3ce5aa9 1798
e8b43fe0 1799 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1800 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1801 else
1802 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1803
d552a2b9
BP
1804 ret = gfs2_rindex_update(sdp);
1805 if (ret)
1806 goto out_metapath;
1807
1808 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1809 if (ret)
1810 goto out_metapath;
1811 gfs2_holder_mark_uninitialized(&rd_gh);
1812
1813 mp_h = strip_h;
1814
1815 while (state != DEALLOC_DONE) {
1816 switch (state) {
1817 /* Truncate a full metapath at the given strip height.
1818 * Note that strip_h == mp_h in order to be in this state. */
1819 case DEALLOC_MP_FULL:
d552a2b9
BP
1820 bh = mp.mp_bh[mp_h];
1821 gfs2_assert_withdraw(sdp, bh);
1822 if (gfs2_assert_withdraw(sdp,
1823 prev_bnr != bh->b_blocknr)) {
f29e62ee
BP
1824 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1825 "s_h:%u, mp_h:%u\n",
d552a2b9
BP
1826 (unsigned long long)ip->i_no_addr,
1827 prev_bnr, ip->i_height, strip_h, mp_h);
1828 }
1829 prev_bnr = bh->b_blocknr;
cb7f0903 1830
5cf26b1e
AG
1831 if (gfs2_metatype_check(sdp, bh,
1832 (mp_h ? GFS2_METATYPE_IN :
1833 GFS2_METATYPE_DI))) {
1834 ret = -EIO;
1835 goto out;
1836 }
1837
10d2cf94
AG
1838 /*
1839 * Below, passing end_aligned as 0 gives us the
1840 * metapointer range excluding the end point: the end
1841 * point is the first metapath we must not deallocate!
1842 */
1843
5cf26b1e 1844 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1845 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1846 &start, &end);
1847 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1848 start, end,
1849 mp_h != ip->i_height - 1,
1850 &btotal);
cb7f0903 1851
d552a2b9
BP
1852 /* If we hit an error or just swept dinode buffer,
1853 just exit. */
1854 if (ret || !mp_h) {
1855 state = DEALLOC_DONE;
1856 break;
1857 }
1858 state = DEALLOC_MP_LOWER;
1859 break;
1860
1861 /* lower the metapath strip height */
1862 case DEALLOC_MP_LOWER:
1863 /* We're done with the current buffer, so release it,
1864 unless it's the dinode buffer. Then back up to the
1865 previous pointer. */
1866 if (mp_h) {
1867 brelse(mp.mp_bh[mp_h]);
1868 mp.mp_bh[mp_h] = NULL;
1869 }
1870 /* If we can't get any lower in height, we've stripped
1871 off all we can. Next step is to back up and start
1872 stripping the previous level of metadata. */
1873 if (mp_h == 0) {
1874 strip_h--;
cb7f0903 1875 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1876 mp_h = strip_h;
1877 state = DEALLOC_FILL_MP;
1878 break;
1879 }
1880 mp.mp_list[mp_h] = 0;
1881 mp_h--; /* search one metadata height down */
d552a2b9 1882 mp.mp_list[mp_h]++;
10d2cf94
AG
1883 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1884 break;
d552a2b9
BP
1885 /* Here we've found a part of the metapath that is not
1886 * allocated. We need to search at that height for the
1887 * next non-null pointer. */
10d2cf94 1888 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1889 state = DEALLOC_FILL_MP;
1890 mp_h++;
1891 }
1892 /* No more non-null pointers at this height. Back up
1893 to the previous height and try again. */
1894 break; /* loop around in the same state */
1895
1896 /* Fill the metapath with buffers to the given height. */
1897 case DEALLOC_FILL_MP:
1898 /* Fill the buffers out to the current height. */
1899 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1900 if (ret < 0)
d552a2b9 1901 goto out;
c3ce5aa9 1902
e7445ced
AG
1903 /* On the first pass, issue read-ahead on metadata. */
1904 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1905 unsigned int height = mp.mp_aheight - 1;
1906
1907 /* No read-ahead for data blocks. */
1908 if (mp.mp_aheight - 1 == strip_h)
1909 height--;
1910
1911 for (; height >= mp.mp_aheight - ret; height--) {
1912 metapointer_range(&mp, height,
5cf26b1e 1913 start_list, start_aligned,
10d2cf94 1914 end_list, end_aligned,
5cf26b1e
AG
1915 &start, &end);
1916 gfs2_metapath_ra(ip->i_gl, start, end);
1917 }
c3ce5aa9 1918 }
d552a2b9
BP
1919
1920 /* If buffers found for the entire strip height */
e8b43fe0 1921 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1922 state = DEALLOC_MP_FULL;
1923 break;
1924 }
e8b43fe0
AG
1925 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1926 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1927
1928 /* If we find a non-null block pointer, crawl a bit
1929 higher up in the metapath and try again, otherwise
1930 we need to look lower for a new starting point. */
10d2cf94 1931 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1932 mp_h++;
1933 else
1934 state = DEALLOC_MP_LOWER;
b3b94faa 1935 break;
d552a2b9 1936 }
b3b94faa
DT
1937 }
1938
d552a2b9
BP
1939 if (btotal) {
1940 if (current->journal_info == NULL) {
1941 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1942 RES_QUOTA, 0);
1943 if (ret)
1944 goto out;
1945 down_write(&ip->i_rw_mutex);
1946 }
1947 gfs2_statfs_change(sdp, 0, +btotal, 0);
1948 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1949 ip->i_inode.i_gid);
8a8b8d91 1950 ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
d552a2b9
BP
1951 gfs2_trans_add_meta(ip->i_gl, dibh);
1952 gfs2_dinode_out(ip, dibh->b_data);
1953 up_write(&ip->i_rw_mutex);
1954 gfs2_trans_end(sdp);
1955 }
b3b94faa 1956
d552a2b9
BP
1957out:
1958 if (gfs2_holder_initialized(&rd_gh))
1959 gfs2_glock_dq_uninit(&rd_gh);
1960 if (current->journal_info) {
1961 up_write(&ip->i_rw_mutex);
1962 gfs2_trans_end(sdp);
1963 cond_resched();
1964 }
1965 gfs2_quota_unhold(ip);
1966out_metapath:
1967 release_metapath(&mp);
1968 return ret;
b3b94faa
DT
1969}
1970
1971static int trunc_end(struct gfs2_inode *ip)
1972{
feaa7bba 1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
1974 struct buffer_head *dibh;
1975 int error;
1976
1977 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1978 if (error)
1979 return error;
1980
1981 down_write(&ip->i_rw_mutex);
1982
1983 error = gfs2_meta_inode_buffer(ip, &dibh);
1984 if (error)
1985 goto out;
1986
a2e0f799 1987 if (!i_size_read(&ip->i_inode)) {
ecc30c79 1988 ip->i_height = 0;
ce276b06 1989 ip->i_goal = ip->i_no_addr;
b3b94faa 1990 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 1991 gfs2_ordered_del_inode(ip);
b3b94faa 1992 }
8a8b8d91 1993 ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
383f01fb 1994 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1995
350a9b0a 1996 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 1997 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
1998 brelse(dibh);
1999
a91ea69f 2000out:
b3b94faa 2001 up_write(&ip->i_rw_mutex);
b3b94faa 2002 gfs2_trans_end(sdp);
b3b94faa
DT
2003 return error;
2004}
2005
2006/**
2007 * do_shrink - make a file smaller
ff8f33c8 2008 * @inode: the inode
ff8f33c8 2009 * @newsize: the size to make the file
b3b94faa 2010 *
ff8f33c8
SW
2011 * Called with an exclusive lock on @inode. The @size must
2012 * be equal to or smaller than the current inode size.
b3b94faa
DT
2013 *
2014 * Returns: errno
2015 */
2016
8b5860a3 2017static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 2018{
ff8f33c8 2019 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
2020 int error;
2021
8b5860a3 2022 error = trunc_start(inode, newsize);
b3b94faa
DT
2023 if (error < 0)
2024 return error;
ff8f33c8 2025 if (gfs2_is_stuffed(ip))
b3b94faa
DT
2026 return 0;
2027
10d2cf94 2028 error = punch_hole(ip, newsize, 0);
ff8f33c8 2029 if (error == 0)
b3b94faa
DT
2030 error = trunc_end(ip);
2031
2032 return error;
2033}
2034
ff8f33c8
SW
2035/**
2036 * do_grow - Touch and update inode size
2037 * @inode: The inode
2038 * @size: The new size
2039 *
2040 * This function updates the timestamps on the inode and
2041 * may also increase the size of the inode. This function
2042 * must not be called with @size any smaller than the current
2043 * inode size.
2044 *
2045 * Although it is not strictly required to unstuff files here,
2046 * earlier versions of GFS2 have a bug in the stuffed file reading
2047 * code which will result in a buffer overrun if the size is larger
2048 * than the max stuffed file size. In order to prevent this from
25985edc 2049 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
2050 * just update the inode size directly.
2051 *
2052 * Returns: 0 on success, or -ve on error
2053 */
2054
2055static int do_grow(struct inode *inode, u64 size)
2056{
2057 struct gfs2_inode *ip = GFS2_I(inode);
2058 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 2059 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
2060 struct buffer_head *dibh;
2061 int error;
2f7ee358 2062 int unstuff = 0;
a13b8c5f 2063
235628c5 2064 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 2065 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 2066 if (error)
5407e242 2067 return error;
ff8f33c8 2068
7b9cff46 2069 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
2070 if (error)
2071 goto do_grow_qunlock;
2f7ee358 2072 unstuff = 1;
ff8f33c8
SW
2073 }
2074
a01aedfe 2075 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
bc020561
BP
2076 (unstuff &&
2077 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
a01aedfe
BP
2078 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2079 0 : RES_QUOTA), 0);
a13b8c5f 2080 if (error)
ff8f33c8 2081 goto do_grow_release;
a13b8c5f 2082
2f7ee358 2083 if (unstuff) {
7a607a41 2084 error = gfs2_unstuff_dinode(ip);
ff8f33c8
SW
2085 if (error)
2086 goto do_end_trans;
2087 }
a13b8c5f
WC
2088
2089 error = gfs2_meta_inode_buffer(ip, &dibh);
2090 if (error)
ff8f33c8 2091 goto do_end_trans;
a13b8c5f 2092
b473bc2d 2093 truncate_setsize(inode, size);
8a8b8d91 2094 ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
350a9b0a 2095 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
2096 gfs2_dinode_out(ip, dibh->b_data);
2097 brelse(dibh);
2098
ff8f33c8 2099do_end_trans:
a13b8c5f 2100 gfs2_trans_end(sdp);
ff8f33c8 2101do_grow_release:
2f7ee358 2102 if (unstuff) {
ff8f33c8
SW
2103 gfs2_inplace_release(ip);
2104do_grow_qunlock:
2105 gfs2_quota_unlock(ip);
ff8f33c8 2106 }
a13b8c5f
WC
2107 return error;
2108}
2109
b3b94faa 2110/**
ff8f33c8
SW
2111 * gfs2_setattr_size - make a file a given size
2112 * @inode: the inode
2113 * @newsize: the size to make the file
b3b94faa 2114 *
ff8f33c8 2115 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 2116 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 2117 * in question.
b3b94faa
DT
2118 *
2119 * Returns: errno
2120 */
2121
ff8f33c8 2122int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 2123{
af5c2697 2124 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 2125 int ret;
b3b94faa 2126
ff8f33c8 2127 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 2128
ff8f33c8
SW
2129 ret = inode_newsize_ok(inode, newsize);
2130 if (ret)
2131 return ret;
b3b94faa 2132
562c72aa
CH
2133 inode_dio_wait(inode);
2134
2fba46a0 2135 ret = gfs2_qa_get(ip);
d2b47cfb 2136 if (ret)
2b3dcf35 2137 goto out;
d2b47cfb 2138
8b5860a3 2139 if (newsize >= inode->i_size) {
2b3dcf35
BP
2140 ret = do_grow(inode, newsize);
2141 goto out;
2142 }
ff8f33c8 2143
8b5860a3 2144 ret = do_shrink(inode, newsize);
2b3dcf35 2145out:
7336905a 2146 gfs2_rs_delete(ip);
1595548f 2147 gfs2_qa_put(ip);
2b3dcf35 2148 return ret;
b3b94faa
DT
2149}
2150
2151int gfs2_truncatei_resume(struct gfs2_inode *ip)
2152{
2153 int error;
10d2cf94 2154 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
2155 if (!error)
2156 error = trunc_end(ip);
2157 return error;
2158}
2159
2160int gfs2_file_dealloc(struct gfs2_inode *ip)
2161{
10d2cf94 2162 return punch_hole(ip, 0, 0);
b3b94faa
DT
2163}
2164
b50f227b
SW
2165/**
2166 * gfs2_free_journal_extents - Free cached journal bmap info
2167 * @jd: The journal
2168 *
2169 */
2170
2171void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2172{
2173 struct gfs2_journal_extent *jext;
2174
2175 while(!list_empty(&jd->extent_list)) {
969183bc 2176 jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
b50f227b
SW
2177 list_del(&jext->list);
2178 kfree(jext);
2179 }
2180}
2181
2182/**
2183 * gfs2_add_jextent - Add or merge a new extent to extent cache
2184 * @jd: The journal descriptor
2185 * @lblock: The logical block at start of new extent
c62baf65 2186 * @dblock: The physical block at start of new extent
b50f227b
SW
2187 * @blocks: Size of extent in fs blocks
2188 *
2189 * Returns: 0 on success or -ENOMEM
2190 */
2191
2192static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2193{
2194 struct gfs2_journal_extent *jext;
2195
2196 if (!list_empty(&jd->extent_list)) {
969183bc 2197 jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
b50f227b
SW
2198 if ((jext->dblock + jext->blocks) == dblock) {
2199 jext->blocks += blocks;
2200 return 0;
2201 }
2202 }
2203
2204 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2205 if (jext == NULL)
2206 return -ENOMEM;
2207 jext->dblock = dblock;
2208 jext->lblock = lblock;
2209 jext->blocks = blocks;
2210 list_add_tail(&jext->list, &jd->extent_list);
2211 jd->nr_extents++;
2212 return 0;
2213}
2214
2215/**
2216 * gfs2_map_journal_extents - Cache journal bmap info
2217 * @sdp: The super block
2218 * @jd: The journal to map
2219 *
2220 * Create a reusable "extent" mapping from all logical
2221 * blocks to all physical blocks for the given journal. This will save
2222 * us time when writing journal blocks. Most journals will have only one
2223 * extent that maps all their logical blocks. That's because gfs2.mkfs
2224 * arranges the journal blocks sequentially to maximize performance.
2225 * So the extent would map the first block for the entire file length.
2226 * However, gfs2_jadd can happen while file activity is happening, so
2227 * those journals may not be sequential. Less likely is the case where
2228 * the users created their own journals by mounting the metafs and
2229 * laying it out. But it's still possible. These journals might have
2230 * several extents.
2231 *
2232 * Returns: 0 on success, or error on failure
2233 */
2234
2235int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2236{
2237 u64 lblock = 0;
2238 u64 lblock_stop;
2239 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2240 struct buffer_head bh;
2241 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2242 u64 size;
2243 int rc;
98583b3e 2244 ktime_t start, end;
b50f227b 2245
98583b3e 2246 start = ktime_get();
b50f227b
SW
2247 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2248 size = (lblock_stop - lblock) << shift;
2249 jd->nr_extents = 0;
2250 WARN_ON(!list_empty(&jd->extent_list));
2251
2252 do {
2253 bh.b_state = 0;
2254 bh.b_blocknr = 0;
2255 bh.b_size = size;
2256 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2257 if (rc || !buffer_mapped(&bh))
2258 goto fail;
2259 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2260 if (rc)
2261 goto fail;
2262 size -= bh.b_size;
2263 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2264 } while(size > 0);
2265
98583b3e
AD
2266 end = ktime_get();
2267 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2268 jd->nr_extents, ktime_ms_delta(end, start));
b50f227b
SW
2269 return 0;
2270
2271fail:
2272 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2273 rc, jd->jd_jid,
2274 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2275 jd->nr_extents);
2276 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2277 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2278 bh.b_state, (unsigned long long)bh.b_size);
2279 gfs2_free_journal_extents(jd);
2280 return rc;
2281}
2282
b3b94faa
DT
2283/**
2284 * gfs2_write_alloc_required - figure out if a write will require an allocation
2285 * @ip: the file being written to
2286 * @offset: the offset to write to
2287 * @len: the number of bytes being written
b3b94faa 2288 *
461cb419 2289 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2290 */
2291
cd915493 2292int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2293 unsigned int len)
b3b94faa 2294{
feaa7bba 2295 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2296 struct buffer_head bh;
2297 unsigned int shift;
2298 u64 lblock, lblock_stop, size;
7ed122e4 2299 u64 end_of_file;
b3b94faa 2300
b3b94faa
DT
2301 if (!len)
2302 return 0;
2303
2304 if (gfs2_is_stuffed(ip)) {
235628c5 2305 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2306 return 1;
b3b94faa
DT
2307 return 0;
2308 }
2309
941e6d7d 2310 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2311 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2312 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2313 lblock = offset >> shift;
2314 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
77612578 2315 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
461cb419 2316 return 1;
b3b94faa 2317
941e6d7d
SW
2318 size = (lblock_stop - lblock) << shift;
2319 do {
2320 bh.b_state = 0;
2321 bh.b_size = size;
2322 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2323 if (!buffer_mapped(&bh))
461cb419 2324 return 1;
941e6d7d
SW
2325 size -= bh.b_size;
2326 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2327 } while(size > 0);
b3b94faa
DT
2328
2329 return 0;
2330}
2331
4e56a641
AG
2332static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2333{
2334 struct gfs2_inode *ip = GFS2_I(inode);
2335 struct buffer_head *dibh;
2336 int error;
2337
2338 if (offset >= inode->i_size)
2339 return 0;
2340 if (offset + length > inode->i_size)
2341 length = inode->i_size - offset;
2342
2343 error = gfs2_meta_inode_buffer(ip, &dibh);
2344 if (error)
2345 return error;
2346 gfs2_trans_add_meta(ip->i_gl, dibh);
2347 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2348 length);
2349 brelse(dibh);
2350 return 0;
2351}
2352
2353static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2354 loff_t length)
2355{
2356 struct gfs2_sbd *sdp = GFS2_SB(inode);
2357 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2358 int error;
2359
2360 while (length) {
2361 struct gfs2_trans *tr;
2362 loff_t chunk;
2363 unsigned int offs;
2364
2365 chunk = length;
2366 if (chunk > max_chunk)
2367 chunk = max_chunk;
2368
2369 offs = offset & ~PAGE_MASK;
2370 if (offs && chunk > PAGE_SIZE)
2371 chunk = offs + ((chunk - offs) & PAGE_MASK);
2372
2373 truncate_pagecache_range(inode, offset, chunk);
2374 offset += chunk;
2375 length -= chunk;
2376
2377 tr = current->journal_info;
2378 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2379 continue;
2380
2381 gfs2_trans_end(sdp);
2382 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2383 if (error)
2384 return error;
2385 }
2386 return 0;
2387}
2388
2389int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2390{
2391 struct inode *inode = file_inode(file);
2392 struct gfs2_inode *ip = GFS2_I(inode);
2393 struct gfs2_sbd *sdp = GFS2_SB(inode);
39c3a948
AG
2394 unsigned int blocksize = i_blocksize(inode);
2395 loff_t start, end;
4e56a641
AG
2396 int error;
2397
70499cdf 2398 if (!gfs2_is_stuffed(ip)) {
39c3a948 2399 unsigned int start_off, end_len;
4e56a641 2400
4e56a641 2401 start_off = offset & (blocksize - 1);
00251a16 2402 end_len = (offset + length) & (blocksize - 1);
4e56a641
AG
2403 if (start_off) {
2404 unsigned int len = length;
2405 if (length > blocksize - start_off)
2406 len = blocksize - start_off;
2407 error = gfs2_block_zero_range(inode, offset, len);
2408 if (error)
2409 goto out;
2410 if (start_off + length < blocksize)
00251a16 2411 end_len = 0;
4e56a641 2412 }
00251a16 2413 if (end_len) {
4e56a641 2414 error = gfs2_block_zero_range(inode,
00251a16 2415 offset + length - end_len, end_len);
4e56a641
AG
2416 if (error)
2417 goto out;
2418 }
2419 }
2420
70499cdf
BP
2421 start = round_down(offset, blocksize);
2422 end = round_up(offset + length, blocksize) - 1;
2423 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
2424 if (error)
2425 return error;
2426
2427 if (gfs2_is_jdata(ip))
2428 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2429 GFS2_JTRUNC_REVOKES);
2430 else
2431 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2432 if (error)
2433 return error;
2434
2435 if (gfs2_is_stuffed(ip)) {
2436 error = stuffed_zero_range(inode, offset, length);
2437 if (error)
2438 goto out;
2439 }
2440
4e56a641
AG
2441 if (gfs2_is_jdata(ip)) {
2442 BUG_ON(!current->journal_info);
2443 gfs2_journaled_truncate_range(inode, offset, length);
2444 } else
2445 truncate_pagecache_range(inode, offset, offset + length - 1);
2446
2447 file_update_time(file);
2448 mark_inode_dirty(inode);
2449
2450 if (current->journal_info)
2451 gfs2_trans_end(sdp);
2452
2453 if (!gfs2_is_stuffed(ip))
2454 error = punch_hole(ip, offset, length);
2455
2456out:
2457 if (current->journal_info)
2458 gfs2_trans_end(sdp);
2459 return error;
2460}
2164f9b9
CH
2461
2462static int gfs2_map_blocks(struct iomap_writepage_ctx *wpc, struct inode *inode,
2463 loff_t offset)
2464{
2164f9b9
CH
2465 int ret;
2466
2467 if (WARN_ON_ONCE(gfs2_is_stuffed(GFS2_I(inode))))
2468 return -EIO;
2469
2470 if (offset >= wpc->iomap.offset &&
2471 offset < wpc->iomap.offset + wpc->iomap.length)
2472 return 0;
2473
2474 memset(&wpc->iomap, 0, sizeof(wpc->iomap));
54992257 2475 ret = gfs2_iomap_get(inode, offset, INT_MAX, &wpc->iomap);
2164f9b9
CH
2476 return ret;
2477}
2478
2479const struct iomap_writeback_ops gfs2_writeback_ops = {
2480 .map_blocks = gfs2_map_blocks,
2481};