gfs2: Improve mmap write vs. truncate consistency
[linux-2.6-block.git] / fs / gfs2 / bmap.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
b3b94faa
DT
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
64dd153c 10#include <linux/blkdev.h>
5c676f6d 11#include <linux/gfs2_ondisk.h>
71b86f56 12#include <linux/crc32.h>
3974320c 13#include <linux/iomap.h>
98583b3e 14#include <linux/ktime.h>
b3b94faa
DT
15
16#include "gfs2.h"
5c676f6d 17#include "incore.h"
b3b94faa
DT
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
b3b94faa 21#include "meta_io.h"
b3b94faa
DT
22#include "quota.h"
23#include "rgrp.h"
45138990 24#include "log.h"
4c16c36a 25#include "super.h"
b3b94faa 26#include "trans.h"
18ec7d5c 27#include "dir.h"
5c676f6d 28#include "util.h"
64bc06bb 29#include "aops.h"
63997775 30#include "trace_gfs2.h"
b3b94faa
DT
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
dbac6710 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
39 int mp_fheight; /* find_metapath height */
40 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
41};
42
64bc06bb
AG
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
f25ef0c1
SW
45/**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
ff8f33c8 50 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
51 *
52 * Returns: errno
53 */
54
55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 56 u64 block, struct page *page)
f25ef0c1 57{
f25ef0c1
SW
58 struct inode *inode = &ip->i_inode;
59 struct buffer_head *bh;
60 int release = 0;
61
62 if (!page || page->index) {
220cca2a 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
64 if (!page)
65 return -ENOMEM;
66 release = 1;
67 }
68
69 if (!PageUptodate(page)) {
70 void *kaddr = kmap(page);
602c89d2
SW
71 u64 dsize = i_size_read(inode);
72
235628c5
AG
73 if (dsize > gfs2_max_stuffed_size(ip))
74 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 75
602c89d2 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 77 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
78 kunmap(page);
79
80 SetPageUptodate(page);
81 }
82
83 if (!page_has_buffers(page))
47a9a527
FF
84 create_empty_buffers(page, BIT(inode->i_blkbits),
85 BIT(BH_Uptodate));
f25ef0c1
SW
86
87 bh = page_buffers(page);
88
89 if (!buffer_mapped(bh))
90 map_bh(bh, inode->i_sb, block);
91
92 set_buffer_uptodate(bh);
845802b1 93 if (gfs2_is_jdata(ip))
350a9b0a 94 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
95 else {
96 mark_buffer_dirty(bh);
97 gfs2_ordered_add_inode(ip);
98 }
f25ef0c1
SW
99
100 if (release) {
101 unlock_page(page);
09cbfeaf 102 put_page(page);
f25ef0c1
SW
103 }
104
105 return 0;
106}
107
b3b94faa
DT
108/**
109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
110 * @ip: The GFS2 inode to unstuff
ff8f33c8 111 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
112 *
113 * This routine unstuffs a dinode and returns it to a "normal" state such
114 * that the height can be grown in the traditional way.
115 *
116 * Returns: errno
117 */
118
f25ef0c1 119int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
120{
121 struct buffer_head *bh, *dibh;
48516ced 122 struct gfs2_dinode *di;
cd915493 123 u64 block = 0;
18ec7d5c 124 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
125 int error;
126
127 down_write(&ip->i_rw_mutex);
128
129 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (error)
131 goto out;
907b9bce 132
a2e0f799 133 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
134 /* Get a free block, fill it with the stuffed data,
135 and write it out to disk */
136
b45e41d7 137 unsigned int n = 1;
6e87ed0f 138 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
139 if (error)
140 goto out_brelse;
18ec7d5c 141 if (isdir) {
fbb27873 142 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 143 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
144 if (error)
145 goto out_brelse;
48516ced 146 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
147 dibh, sizeof(struct gfs2_dinode));
148 brelse(bh);
149 } else {
f25ef0c1 150 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
151 if (error)
152 goto out_brelse;
153 }
154 }
155
156 /* Set up the pointer to the new block */
157
350a9b0a 158 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 159 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
160 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
161
a2e0f799 162 if (i_size_read(&ip->i_inode)) {
48516ced 163 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
164 gfs2_add_inode_blocks(&ip->i_inode, 1);
165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
166 }
167
ecc30c79 168 ip->i_height = 1;
48516ced 169 di->di_height = cpu_to_be16(1);
b3b94faa 170
a91ea69f 171out_brelse:
b3b94faa 172 brelse(dibh);
a91ea69f 173out:
b3b94faa 174 up_write(&ip->i_rw_mutex);
b3b94faa
DT
175 return error;
176}
177
b3b94faa
DT
178
179/**
180 * find_metapath - Find path through the metadata tree
9b8c81d1 181 * @sdp: The superblock
b3b94faa 182 * @block: The disk block to look up
07e23d68 183 * @mp: The metapath to return the result in
9b8c81d1 184 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
185 *
186 * This routine returns a struct metapath structure that defines a path
187 * through the metadata of inode "ip" to get to block "block".
188 *
189 * Example:
190 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
191 * filesystem with a blocksize of 4096.
192 *
193 * find_metapath() would return a struct metapath structure set to:
07e23d68 194 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
195 *
196 * That means that in order to get to the block containing the byte at
197 * offset 101342453, we would load the indirect block pointed to by pointer
198 * 0 in the dinode. We would then load the indirect block pointed to by
199 * pointer 48 in that indirect block. We would then load the data block
200 * pointed to by pointer 165 in that indirect block.
201 *
202 * ----------------------------------------
203 * | Dinode | |
204 * | | 4|
205 * | |0 1 2 3 4 5 9|
206 * | | 6|
207 * ----------------------------------------
208 * |
209 * |
210 * V
211 * ----------------------------------------
212 * | Indirect Block |
213 * | 5|
214 * | 4 4 4 4 4 5 5 1|
215 * |0 5 6 7 8 9 0 1 2|
216 * ----------------------------------------
217 * |
218 * |
219 * V
220 * ----------------------------------------
221 * | Indirect Block |
222 * | 1 1 1 1 1 5|
223 * | 6 6 6 6 6 1|
224 * |0 3 4 5 6 7 2|
225 * ----------------------------------------
226 * |
227 * |
228 * V
229 * ----------------------------------------
230 * | Data block containing offset |
231 * | 101342453 |
232 * | |
233 * | |
234 * ----------------------------------------
235 *
236 */
237
9b8c81d1
SW
238static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
239 struct metapath *mp, unsigned int height)
b3b94faa 240{
b3b94faa
DT
241 unsigned int i;
242
5f8bd444 243 mp->mp_fheight = height;
9b8c81d1 244 for (i = height; i--;)
7eabb77e 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
246}
247
5af4e7a0 248static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 249{
5af4e7a0
BM
250 if (mp->mp_list[0] == 0)
251 return 2;
252 return 1;
9b8c81d1
SW
253}
254
d552a2b9 255/**
20cdc193 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
257 * @height: The metadata height (0 = dinode)
258 * @mp: The metapath
259 */
260static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
261{
262 struct buffer_head *bh = mp->mp_bh[height];
263 if (height == 0)
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
265 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
266}
267
b3b94faa
DT
268/**
269 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
270 * @height: The metadata height (0 = dinode)
271 * @mp: The metapath
272 *
273 * Return a pointer to the block number of the next height of the metadata
274 * tree given a buffer containing the pointer to the current height of the
275 * metadata tree.
276 */
277
9b8c81d1 278static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 279{
d552a2b9
BP
280 __be64 *p = metaptr1(height, mp);
281 return p + mp->mp_list[height];
b3b94faa
DT
282}
283
7841b9f0
AG
284static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
285{
286 const struct buffer_head *bh = mp->mp_bh[height];
287 return (const __be64 *)(bh->b_data + bh->b_size);
288}
289
290static void clone_metapath(struct metapath *clone, struct metapath *mp)
291{
292 unsigned int hgt;
293
294 *clone = *mp;
295 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
296 get_bh(clone->mp_bh[hgt]);
297}
298
5cf26b1e 299static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 300{
b99b98dc
SW
301 const __be64 *t;
302
5cf26b1e 303 for (t = start; t < end; t++) {
c3ce5aa9
AG
304 struct buffer_head *rabh;
305
b99b98dc
SW
306 if (!*t)
307 continue;
308
309 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
310 if (trylock_buffer(rabh)) {
311 if (!buffer_uptodate(rabh)) {
312 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
313 submit_bh(REQ_OP_READ,
314 REQ_RAHEAD | REQ_META | REQ_PRIO,
315 rabh);
b99b98dc
SW
316 continue;
317 }
318 unlock_buffer(rabh);
319 }
320 brelse(rabh);
321 }
322}
323
e8b43fe0
AG
324static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
325 unsigned int x, unsigned int h)
d552a2b9 326{
e8b43fe0
AG
327 for (; x < h; x++) {
328 __be64 *ptr = metapointer(x, mp);
329 u64 dblock = be64_to_cpu(*ptr);
330 int ret;
d552a2b9 331
e8b43fe0
AG
332 if (!dblock)
333 break;
334 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
335 if (ret)
336 return ret;
337 }
338 mp->mp_aheight = x + 1;
339 return 0;
d552a2b9
BP
340}
341
b3b94faa 342/**
9b8c81d1
SW
343 * lookup_metapath - Walk the metadata tree to a specific point
344 * @ip: The inode
b3b94faa 345 * @mp: The metapath
b3b94faa 346 *
9b8c81d1
SW
347 * Assumes that the inode's buffer has already been looked up and
348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
349 * by find_metapath().
350 *
351 * If this function encounters part of the tree which has not been
352 * allocated, it returns the current height of the tree at the point
353 * at which it found the unallocated block. Blocks which are found are
354 * added to the mp->mp_bh[] list.
b3b94faa 355 *
e8b43fe0 356 * Returns: error
b3b94faa
DT
357 */
358
9b8c81d1 359static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 360{
e8b43fe0 361 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
362}
363
d552a2b9
BP
364/**
365 * fillup_metapath - fill up buffers for the metadata path to a specific height
366 * @ip: The inode
367 * @mp: The metapath
368 * @h: The height to which it should be mapped
369 *
370 * Similar to lookup_metapath, but does lookups for a range of heights
371 *
c3ce5aa9 372 * Returns: error or the number of buffers filled
d552a2b9
BP
373 */
374
375static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
376{
e8b43fe0 377 unsigned int x = 0;
c3ce5aa9 378 int ret;
d552a2b9
BP
379
380 if (h) {
381 /* find the first buffer we need to look up. */
e8b43fe0
AG
382 for (x = h - 1; x > 0; x--) {
383 if (mp->mp_bh[x])
384 break;
d552a2b9
BP
385 }
386 }
c3ce5aa9
AG
387 ret = __fillup_metapath(ip, mp, x, h);
388 if (ret)
389 return ret;
390 return mp->mp_aheight - x - 1;
d552a2b9
BP
391}
392
a27a0c9b
AG
393static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
394{
395 sector_t factor = 1, block = 0;
396 int hgt;
397
398 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
399 if (hgt < mp->mp_aheight)
400 block += mp->mp_list[hgt] * factor;
401 factor *= sdp->sd_inptrs;
402 }
403 return block;
404}
405
64bc06bb 406static void release_metapath(struct metapath *mp)
dbac6710
SW
407{
408 int i;
409
9b8c81d1
SW
410 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
411 if (mp->mp_bh[i] == NULL)
412 break;
413 brelse(mp->mp_bh[i]);
64bc06bb 414 mp->mp_bh[i] = NULL;
9b8c81d1 415 }
11707ea0
SW
416}
417
30cbf189
SW
418/**
419 * gfs2_extent_length - Returns length of an extent of blocks
bcfe9413
AG
420 * @bh: The metadata block
421 * @ptr: Current position in @bh
422 * @limit: Max extent length to return
30cbf189
SW
423 * @eob: Set to 1 if we hit "end of block"
424 *
30cbf189
SW
425 * Returns: The length of the extent (minimum of one block)
426 */
427
bcfe9413 428static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
30cbf189 429{
bcfe9413 430 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
30cbf189
SW
431 const __be64 *first = ptr;
432 u64 d = be64_to_cpu(*ptr);
433
434 *eob = 0;
435 do {
436 ptr++;
437 if (ptr >= end)
438 break;
bcfe9413 439 d++;
30cbf189
SW
440 } while(be64_to_cpu(*ptr) == d);
441 if (ptr >= end)
442 *eob = 1;
bcfe9413 443 return ptr - first;
30cbf189
SW
444}
445
a27a0c9b
AG
446enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
447
448/*
449 * gfs2_metadata_walker - walk an indirect block
450 * @mp: Metapath to indirect block
451 * @ptrs: Number of pointers to look at
452 *
453 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
454 * indirect block to follow.
455 */
456typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
457 unsigned int ptrs);
7841b9f0 458
a27a0c9b
AG
459/*
460 * gfs2_walk_metadata - walk a tree of indirect blocks
461 * @inode: The inode
462 * @mp: Starting point of walk
463 * @max_len: Maximum number of blocks to walk
464 * @walker: Called during the walk
465 *
466 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
467 * past the end of metadata, and a negative error code otherwise.
468 */
7841b9f0 469
a27a0c9b
AG
470static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
471 u64 max_len, gfs2_metadata_walker walker)
7841b9f0 472{
7841b9f0
AG
473 struct gfs2_inode *ip = GFS2_I(inode);
474 struct gfs2_sbd *sdp = GFS2_SB(inode);
7841b9f0
AG
475 u64 factor = 1;
476 unsigned int hgt;
a27a0c9b 477 int ret;
7841b9f0 478
a27a0c9b
AG
479 /*
480 * The walk starts in the lowest allocated indirect block, which may be
481 * before the position indicated by @mp. Adjust @max_len accordingly
482 * to avoid a short walk.
483 */
484 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
485 max_len += mp->mp_list[hgt] * factor;
486 mp->mp_list[hgt] = 0;
7841b9f0 487 factor *= sdp->sd_inptrs;
a27a0c9b 488 }
7841b9f0
AG
489
490 for (;;) {
a27a0c9b
AG
491 u16 start = mp->mp_list[hgt];
492 enum walker_status status;
493 unsigned int ptrs;
494 u64 len;
7841b9f0
AG
495
496 /* Walk indirect block. */
a27a0c9b
AG
497 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
498 len = ptrs * factor;
499 if (len > max_len)
500 ptrs = DIV_ROUND_UP_ULL(max_len, factor);
501 status = walker(mp, ptrs);
502 switch (status) {
503 case WALK_STOP:
504 return 1;
505 case WALK_FOLLOW:
506 BUG_ON(mp->mp_aheight == mp->mp_fheight);
507 ptrs = mp->mp_list[hgt] - start;
508 len = ptrs * factor;
7841b9f0 509 break;
a27a0c9b 510 case WALK_CONTINUE:
7841b9f0 511 break;
7841b9f0 512 }
a27a0c9b
AG
513 if (len >= max_len)
514 break;
515 max_len -= len;
516 if (status == WALK_FOLLOW)
517 goto fill_up_metapath;
7841b9f0
AG
518
519lower_metapath:
520 /* Decrease height of metapath. */
7841b9f0
AG
521 brelse(mp->mp_bh[hgt]);
522 mp->mp_bh[hgt] = NULL;
a27a0c9b 523 mp->mp_list[hgt] = 0;
7841b9f0
AG
524 if (!hgt)
525 break;
526 hgt--;
527 factor *= sdp->sd_inptrs;
528
529 /* Advance in metadata tree. */
530 (mp->mp_list[hgt])++;
a27a0c9b 531 if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
7841b9f0
AG
532 if (!hgt)
533 break;
534 goto lower_metapath;
535 }
536
537fill_up_metapath:
538 /* Increase height of metapath. */
7841b9f0
AG
539 ret = fillup_metapath(ip, mp, ip->i_height - 1);
540 if (ret < 0)
a27a0c9b 541 return ret;
7841b9f0
AG
542 hgt += ret;
543 for (; ret; ret--)
544 do_div(factor, sdp->sd_inptrs);
545 mp->mp_aheight = hgt + 1;
546 }
a27a0c9b 547 return 0;
7841b9f0
AG
548}
549
a27a0c9b
AG
550static enum walker_status gfs2_hole_walker(struct metapath *mp,
551 unsigned int ptrs)
7841b9f0 552{
a27a0c9b
AG
553 const __be64 *start, *ptr, *end;
554 unsigned int hgt;
555
556 hgt = mp->mp_aheight - 1;
557 start = metapointer(hgt, mp);
558 end = start + ptrs;
7841b9f0
AG
559
560 for (ptr = start; ptr < end; ptr++) {
561 if (*ptr) {
a27a0c9b 562 mp->mp_list[hgt] += ptr - start;
7841b9f0
AG
563 if (mp->mp_aheight == mp->mp_fheight)
564 return WALK_STOP;
a27a0c9b 565 return WALK_FOLLOW;
7841b9f0
AG
566 }
567 }
a27a0c9b 568 return WALK_CONTINUE;
7841b9f0
AG
569}
570
571/**
572 * gfs2_hole_size - figure out the size of a hole
573 * @inode: The inode
574 * @lblock: The logical starting block number
575 * @len: How far to look (in blocks)
576 * @mp: The metapath at lblock
577 * @iomap: The iomap to store the hole size in
578 *
579 * This function modifies @mp.
580 *
581 * Returns: errno on error
582 */
583static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
584 struct metapath *mp, struct iomap *iomap)
585{
a27a0c9b
AG
586 struct metapath clone;
587 u64 hole_size;
588 int ret;
589
590 clone_metapath(&clone, mp);
591 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
592 if (ret < 0)
593 goto out;
7841b9f0 594
a27a0c9b
AG
595 if (ret == 1)
596 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
597 else
598 hole_size = len;
599 iomap->length = hole_size << inode->i_blkbits;
600 ret = 0;
601
602out:
603 release_metapath(&clone);
7841b9f0
AG
604 return ret;
605}
606
9b8c81d1
SW
607static inline __be64 *gfs2_indirect_init(struct metapath *mp,
608 struct gfs2_glock *gl, unsigned int i,
609 unsigned offset, u64 bn)
610{
611 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
612 ((i > 1) ? sizeof(struct gfs2_meta_header) :
613 sizeof(struct gfs2_dinode)));
614 BUG_ON(i < 1);
615 BUG_ON(mp->mp_bh[i] != NULL);
616 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 617 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
618 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
619 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
620 ptr += offset;
621 *ptr = cpu_to_be64(bn);
622 return ptr;
623}
624
625enum alloc_state {
626 ALLOC_DATA = 0,
627 ALLOC_GROW_DEPTH = 1,
628 ALLOC_GROW_HEIGHT = 2,
629 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
630};
631
632/**
628e366d 633 * gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 634 * @inode: The GFS2 inode
628e366d 635 * @iomap: The iomap structure
5f8bd444 636 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
637 *
638 * In this routine we may have to alloc:
639 * i) Indirect blocks to grow the metadata tree height
640 * ii) Indirect blocks to fill in lower part of the metadata tree
641 * iii) Data blocks
642 *
64bc06bb
AG
643 * This function is called after gfs2_iomap_get, which works out the
644 * total number of blocks which we need via gfs2_alloc_size.
645 *
646 * We then do the actual allocation asking for an extent at a time (if
647 * enough contiguous free blocks are available, there will only be one
648 * allocation request per call) and uses the state machine to initialise
649 * the blocks in order.
9b8c81d1 650 *
628e366d
AG
651 * Right now, this function will allocate at most one indirect block
652 * worth of data -- with a default block size of 4K, that's slightly
653 * less than 2M. If this limitation is ever removed to allow huge
654 * allocations, we would probably still want to limit the iomap size we
655 * return to avoid stalling other tasks during huge writes; the next
656 * iomap iteration would then find the blocks already allocated.
657 *
9b8c81d1
SW
658 * Returns: errno on error
659 */
660
3974320c 661static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
bb4cb25d 662 struct metapath *mp)
9b8c81d1
SW
663{
664 struct gfs2_inode *ip = GFS2_I(inode);
665 struct gfs2_sbd *sdp = GFS2_SB(inode);
666 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 667 u64 bn;
5af4e7a0 668 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
64bc06bb 669 size_t dblks = iomap->length >> inode->i_blkbits;
5f8bd444 670 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 671 int ret;
9b8c81d1
SW
672 enum alloc_state state;
673 __be64 *ptr;
674 __be64 zero_bn = 0;
675
5f8bd444 676 BUG_ON(mp->mp_aheight < 1);
9b8c81d1 677 BUG_ON(dibh == NULL);
64bc06bb 678 BUG_ON(dblks < 1);
9b8c81d1 679
350a9b0a 680 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 681
628e366d
AG
682 down_write(&ip->i_rw_mutex);
683
5f8bd444 684 if (mp->mp_fheight == mp->mp_aheight) {
64bc06bb 685 /* Bottom indirect block exists */
9b8c81d1
SW
686 state = ALLOC_DATA;
687 } else {
688 /* Need to allocate indirect blocks */
5f8bd444 689 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 690 /* Writing into existing tree, extend tree down */
5f8bd444 691 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
692 state = ALLOC_GROW_DEPTH;
693 } else {
694 /* Building up tree height */
695 state = ALLOC_GROW_HEIGHT;
5f8bd444 696 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 697 branch_start = metapath_branch_start(mp);
5f8bd444 698 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
699 }
700 }
701
702 /* start of the second part of the function (state machine) */
703
3974320c 704 blks = dblks + iblks;
5f8bd444 705 i = mp->mp_aheight;
9b8c81d1
SW
706 do {
707 n = blks - alloced;
628e366d
AG
708 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
709 if (ret)
710 goto out;
9b8c81d1
SW
711 alloced += n;
712 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
fbb27873 713 gfs2_trans_remove_revoke(sdp, bn, n);
9b8c81d1
SW
714 switch (state) {
715 /* Growing height of tree */
716 case ALLOC_GROW_HEIGHT:
717 if (i == 1) {
718 ptr = (__be64 *)(dibh->b_data +
719 sizeof(struct gfs2_dinode));
720 zero_bn = *ptr;
721 }
5f8bd444
BP
722 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
723 i++, n--)
9b8c81d1 724 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 725 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
726 i--;
727 gfs2_buffer_copy_tail(mp->mp_bh[i],
728 sizeof(struct gfs2_meta_header),
729 dibh, sizeof(struct gfs2_dinode));
730 gfs2_buffer_clear_tail(dibh,
731 sizeof(struct gfs2_dinode) +
732 sizeof(__be64));
733 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
734 sizeof(struct gfs2_meta_header));
735 *ptr = zero_bn;
736 state = ALLOC_GROW_DEPTH;
5f8bd444 737 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
738 if (mp->mp_bh[i] == NULL)
739 break;
740 brelse(mp->mp_bh[i]);
741 mp->mp_bh[i] = NULL;
742 }
5af4e7a0 743 i = branch_start;
9b8c81d1
SW
744 }
745 if (n == 0)
746 break;
0a4c9265 747 /* fall through - To branching from existing tree */
9b8c81d1 748 case ALLOC_GROW_DEPTH:
5f8bd444 749 if (i > 1 && i < mp->mp_fheight)
350a9b0a 750 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 751 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
752 gfs2_indirect_init(mp, ip->i_gl, i,
753 mp->mp_list[i-1], bn++);
5f8bd444 754 if (i == mp->mp_fheight)
9b8c81d1
SW
755 state = ALLOC_DATA;
756 if (n == 0)
757 break;
0a4c9265 758 /* fall through - To tree complete, adding data blocks */
9b8c81d1 759 case ALLOC_DATA:
3974320c 760 BUG_ON(n > dblks);
9b8c81d1 761 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 762 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 763 dblks = n;
9b8c81d1 764 ptr = metapointer(end_of_metadata, mp);
3974320c 765 iomap->addr = bn << inode->i_blkbits;
628e366d 766 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
767 while (n-- > 0)
768 *ptr++ = cpu_to_be64(bn++);
769 break;
770 }
3974320c 771 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 772
d505a96a 773 iomap->type = IOMAP_MAPPED;
3974320c 774 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 775 ip->i_height = mp->mp_fheight;
9b8c81d1 776 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
777 gfs2_dinode_out(ip, dibh->b_data);
778out:
779 up_write(&ip->i_rw_mutex);
780 return ret;
9b8c81d1
SW
781}
782
7ee66c03
CH
783#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
784
64bc06bb
AG
785/**
786 * gfs2_alloc_size - Compute the maximum allocation size
787 * @inode: The inode
788 * @mp: The metapath
789 * @size: Requested size in blocks
790 *
791 * Compute the maximum size of the next allocation at @mp.
792 *
793 * Returns: size in blocks
794 */
795static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
3974320c
BP
796{
797 struct gfs2_inode *ip = GFS2_I(inode);
64bc06bb
AG
798 struct gfs2_sbd *sdp = GFS2_SB(inode);
799 const __be64 *first, *ptr, *end;
800
801 /*
802 * For writes to stuffed files, this function is called twice via
803 * gfs2_iomap_get, before and after unstuffing. The size we return the
804 * first time needs to be large enough to get the reservation and
805 * allocation sizes right. The size we return the second time must
806 * be exact or else gfs2_iomap_alloc won't do the right thing.
807 */
808
809 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
810 unsigned int maxsize = mp->mp_fheight > 1 ?
811 sdp->sd_inptrs : sdp->sd_diptrs;
812 maxsize -= mp->mp_list[mp->mp_fheight - 1];
813 if (size > maxsize)
814 size = maxsize;
815 return size;
816 }
3974320c 817
64bc06bb
AG
818 first = metapointer(ip->i_height - 1, mp);
819 end = metaend(ip->i_height - 1, mp);
820 if (end - first > size)
821 end = first + size;
822 for (ptr = first; ptr < end; ptr++) {
823 if (*ptr)
824 break;
825 }
826 return ptr - first;
3974320c
BP
827}
828
829/**
628e366d 830 * gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
831 * @inode: The inode
832 * @pos: Starting position in bytes
833 * @length: Length to map, in bytes
834 * @flags: iomap flags
835 * @iomap: The iomap structure
628e366d 836 * @mp: The metapath
3974320c
BP
837 *
838 * Returns: errno
839 */
628e366d
AG
840static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
841 unsigned flags, struct iomap *iomap,
842 struct metapath *mp)
b3b94faa 843{
feaa7bba
SW
844 struct gfs2_inode *ip = GFS2_I(inode);
845 struct gfs2_sbd *sdp = GFS2_SB(inode);
d505a96a 846 loff_t size = i_size_read(inode);
9b8c81d1 847 __be64 *ptr;
3974320c 848 sector_t lblock;
628e366d
AG
849 sector_t lblock_stop;
850 int ret;
9b8c81d1 851 int eob;
628e366d 852 u64 len;
d505a96a 853 struct buffer_head *dibh = NULL, *bh;
9b8c81d1 854 u8 height;
7276b3b0 855
628e366d
AG
856 if (!length)
857 return -EINVAL;
b3b94faa 858
d505a96a
AG
859 down_read(&ip->i_rw_mutex);
860
861 ret = gfs2_meta_inode_buffer(ip, &dibh);
862 if (ret)
863 goto unlock;
c26b5aa8 864 mp->mp_bh[0] = dibh;
d505a96a 865
49edd5bf 866 if (gfs2_is_stuffed(ip)) {
d505a96a
AG
867 if (flags & IOMAP_WRITE) {
868 loff_t max_size = gfs2_max_stuffed_size(ip);
869
870 if (pos + length > max_size)
871 goto unstuff;
872 iomap->length = max_size;
873 } else {
874 if (pos >= size) {
875 if (flags & IOMAP_REPORT) {
876 ret = -ENOENT;
877 goto unlock;
878 } else {
879 /* report a hole */
880 iomap->offset = pos;
881 iomap->length = length;
882 goto do_alloc;
883 }
884 }
885 iomap->length = size;
49edd5bf 886 }
d505a96a
AG
887 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
888 sizeof(struct gfs2_dinode);
889 iomap->type = IOMAP_INLINE;
64bc06bb 890 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
d505a96a 891 goto out;
3974320c 892 }
d505a96a
AG
893
894unstuff:
3974320c 895 lblock = pos >> inode->i_blkbits;
3974320c 896 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
897 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
898 len = lblock_stop - lblock + 1;
d505a96a 899 iomap->length = len << inode->i_blkbits;
628e366d 900
9b8c81d1 901 height = ip->i_height;
9a38662b 902 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 903 height++;
628e366d 904 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
905 if (height > ip->i_height || gfs2_is_stuffed(ip))
906 goto do_alloc;
3974320c 907
628e366d 908 ret = lookup_metapath(ip, mp);
e8b43fe0 909 if (ret)
628e366d 910 goto unlock;
3974320c 911
628e366d 912 if (mp->mp_aheight != ip->i_height)
9b8c81d1 913 goto do_alloc;
3974320c 914
628e366d 915 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
916 if (*ptr == 0)
917 goto do_alloc;
3974320c 918
628e366d 919 bh = mp->mp_bh[ip->i_height - 1];
bcfe9413 920 len = gfs2_extent_length(bh, ptr, len, &eob);
3974320c 921
628e366d
AG
922 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
923 iomap->length = len << inode->i_blkbits;
924 iomap->type = IOMAP_MAPPED;
0ed91eca 925 iomap->flags |= IOMAP_F_MERGED;
9b8c81d1 926 if (eob)
7ee66c03 927 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 928
3974320c 929out:
628e366d
AG
930 iomap->bdev = inode->i_sb->s_bdev;
931unlock:
932 up_read(&ip->i_rw_mutex);
9b8c81d1 933 return ret;
30cbf189 934
9b8c81d1 935do_alloc:
628e366d 936 iomap->addr = IOMAP_NULL_ADDR;
628e366d 937 iomap->type = IOMAP_HOLE;
628e366d 938 if (flags & IOMAP_REPORT) {
49edd5bf 939 if (pos >= size)
3974320c 940 ret = -ENOENT;
628e366d
AG
941 else if (height == ip->i_height)
942 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf
AG
943 else
944 iomap->length = size - pos;
64bc06bb
AG
945 } else if (flags & IOMAP_WRITE) {
946 u64 alloc_size;
947
967bcc91
AG
948 if (flags & IOMAP_DIRECT)
949 goto out; /* (see gfs2_file_direct_write) */
950
64bc06bb
AG
951 len = gfs2_alloc_size(inode, mp, len);
952 alloc_size = len << inode->i_blkbits;
953 if (alloc_size < iomap->length)
954 iomap->length = alloc_size;
955 } else {
d505a96a
AG
956 if (pos < size && height == ip->i_height)
957 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
b3b94faa 958 }
628e366d 959 goto out;
3974320c
BP
960}
961
7c70b896
BP
962/**
963 * gfs2_lblk_to_dblk - convert logical block to disk block
964 * @inode: the inode of the file we're mapping
965 * @lblock: the block relative to the start of the file
966 * @dblock: the returned dblock, if no error
967 *
968 * This function maps a single block from a file logical block (relative to
969 * the start of the file) to a file system absolute block using iomap.
970 *
971 * Returns: the absolute file system block, or an error
972 */
973int gfs2_lblk_to_dblk(struct inode *inode, u32 lblock, u64 *dblock)
974{
975 struct iomap iomap = { };
976 struct metapath mp = { .mp_aheight = 1, };
977 loff_t pos = (loff_t)lblock << inode->i_blkbits;
978 int ret;
979
980 ret = gfs2_iomap_get(inode, pos, i_blocksize(inode), 0, &iomap, &mp);
981 release_metapath(&mp);
982 if (ret == 0)
983 *dblock = iomap.addr >> inode->i_blkbits;
984
985 return ret;
986}
987
64bc06bb
AG
988static int gfs2_write_lock(struct inode *inode)
989{
990 struct gfs2_inode *ip = GFS2_I(inode);
991 struct gfs2_sbd *sdp = GFS2_SB(inode);
992 int error;
993
994 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
995 error = gfs2_glock_nq(&ip->i_gh);
996 if (error)
997 goto out_uninit;
998 if (&ip->i_inode == sdp->sd_rindex) {
999 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
1000
1001 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
1002 GL_NOCACHE, &m_ip->i_gh);
1003 if (error)
1004 goto out_unlock;
1005 }
1006 return 0;
1007
1008out_unlock:
1009 gfs2_glock_dq(&ip->i_gh);
1010out_uninit:
1011 gfs2_holder_uninit(&ip->i_gh);
1012 return error;
1013}
1014
1015static void gfs2_write_unlock(struct inode *inode)
1016{
1017 struct gfs2_inode *ip = GFS2_I(inode);
1018 struct gfs2_sbd *sdp = GFS2_SB(inode);
1019
1020 if (&ip->i_inode == sdp->sd_rindex) {
1021 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
1022
1023 gfs2_glock_dq_uninit(&m_ip->i_gh);
1024 }
1025 gfs2_glock_dq_uninit(&ip->i_gh);
1026}
1027
d0a22a4b
AG
1028static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
1029 unsigned len, struct iomap *iomap)
1030{
2741b672 1031 unsigned int blockmask = i_blocksize(inode) - 1;
d0a22a4b 1032 struct gfs2_sbd *sdp = GFS2_SB(inode);
2741b672 1033 unsigned int blocks;
d0a22a4b 1034
2741b672
AG
1035 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
1036 return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
d0a22a4b
AG
1037}
1038
df0db3ec
AG
1039static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1040 unsigned copied, struct page *page,
1041 struct iomap *iomap)
64bc06bb 1042{
706cb549 1043 struct gfs2_trans *tr = current->journal_info;
64bc06bb 1044 struct gfs2_inode *ip = GFS2_I(inode);
d0a22a4b 1045 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1046
d0a22a4b 1047 if (page && !gfs2_is_stuffed(ip))
df0db3ec 1048 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
706cb549
AG
1049
1050 if (tr->tr_num_buf_new)
1051 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1052
d0a22a4b 1053 gfs2_trans_end(sdp);
64bc06bb
AG
1054}
1055
df0db3ec 1056static const struct iomap_page_ops gfs2_iomap_page_ops = {
d0a22a4b 1057 .page_prepare = gfs2_iomap_page_prepare,
df0db3ec
AG
1058 .page_done = gfs2_iomap_page_done,
1059};
1060
64bc06bb
AG
1061static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1062 loff_t length, unsigned flags,
c26b5aa8
AG
1063 struct iomap *iomap,
1064 struct metapath *mp)
64bc06bb 1065{
64bc06bb
AG
1066 struct gfs2_inode *ip = GFS2_I(inode);
1067 struct gfs2_sbd *sdp = GFS2_SB(inode);
34aad20b 1068 bool unstuff;
64bc06bb
AG
1069 int ret;
1070
64bc06bb
AG
1071 unstuff = gfs2_is_stuffed(ip) &&
1072 pos + length > gfs2_max_stuffed_size(ip);
1073
34aad20b
AG
1074 if (unstuff || iomap->type == IOMAP_HOLE) {
1075 unsigned int data_blocks, ind_blocks;
1076 struct gfs2_alloc_parms ap = {};
1077 unsigned int rblocks;
1078 struct gfs2_trans *tr;
64bc06bb 1079
64bc06bb
AG
1080 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1081 &ind_blocks);
34aad20b 1082 ap.target = data_blocks + ind_blocks;
64bc06bb
AG
1083 ret = gfs2_quota_lock_check(ip, &ap);
1084 if (ret)
34aad20b 1085 return ret;
64bc06bb
AG
1086
1087 ret = gfs2_inplace_reserve(ip, &ap);
1088 if (ret)
1089 goto out_qunlock;
64bc06bb 1090
34aad20b
AG
1091 rblocks = RES_DINODE + ind_blocks;
1092 if (gfs2_is_jdata(ip))
1093 rblocks += data_blocks;
1094 if (ind_blocks || data_blocks)
1095 rblocks += RES_STATFS + RES_QUOTA;
1096 if (inode == sdp->sd_rindex)
1097 rblocks += 2 * RES_STATFS;
64bc06bb
AG
1098 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1099
d0a22a4b
AG
1100 ret = gfs2_trans_begin(sdp, rblocks,
1101 iomap->length >> inode->i_blkbits);
64bc06bb 1102 if (ret)
d0a22a4b
AG
1103 goto out_trans_fail;
1104
1105 if (unstuff) {
1106 ret = gfs2_unstuff_dinode(ip, NULL);
1107 if (ret)
1108 goto out_trans_end;
1109 release_metapath(mp);
1110 ret = gfs2_iomap_get(inode, iomap->offset,
1111 iomap->length, flags, iomap, mp);
1112 if (ret)
1113 goto out_trans_end;
1114 }
64bc06bb 1115
d0a22a4b 1116 if (iomap->type == IOMAP_HOLE) {
bb4cb25d 1117 ret = gfs2_iomap_alloc(inode, iomap, mp);
d0a22a4b
AG
1118 if (ret) {
1119 gfs2_trans_end(sdp);
1120 gfs2_inplace_release(ip);
1121 punch_hole(ip, iomap->offset, iomap->length);
1122 goto out_qunlock;
1123 }
64bc06bb 1124 }
d0a22a4b
AG
1125
1126 tr = current->journal_info;
1127 if (tr->tr_num_buf_new)
1128 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
d0a22a4b
AG
1129
1130 gfs2_trans_end(sdp);
64bc06bb 1131 }
d0a22a4b
AG
1132
1133 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
df0db3ec 1134 iomap->page_ops = &gfs2_iomap_page_ops;
64bc06bb
AG
1135 return 0;
1136
1137out_trans_end:
1138 gfs2_trans_end(sdp);
1139out_trans_fail:
34aad20b 1140 gfs2_inplace_release(ip);
64bc06bb 1141out_qunlock:
34aad20b 1142 gfs2_quota_unlock(ip);
64bc06bb
AG
1143 return ret;
1144}
1145
34aad20b
AG
1146static inline bool gfs2_iomap_need_write_lock(unsigned flags)
1147{
1148 return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT);
1149}
1150
628e366d
AG
1151static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1152 unsigned flags, struct iomap *iomap)
1153{
1154 struct gfs2_inode *ip = GFS2_I(inode);
1155 struct metapath mp = { .mp_aheight = 1, };
1156 int ret;
1157
0ed91eca
AG
1158 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1159
628e366d 1160 trace_gfs2_iomap_start(ip, pos, length, flags);
34aad20b
AG
1161 if (gfs2_iomap_need_write_lock(flags)) {
1162 ret = gfs2_write_lock(inode);
1163 if (ret)
1164 goto out;
628e366d 1165 }
34aad20b
AG
1166
1167 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1168 if (ret)
1169 goto out_unlock;
1170
72d36d05 1171 switch(flags & (IOMAP_WRITE | IOMAP_ZERO)) {
34aad20b
AG
1172 case IOMAP_WRITE:
1173 if (flags & IOMAP_DIRECT) {
1174 /*
1175 * Silently fall back to buffered I/O for stuffed files
1176 * or if we've got a hole (see gfs2_file_direct_write).
1177 */
1178 if (iomap->type != IOMAP_MAPPED)
1179 ret = -ENOTBLK;
1180 goto out_unlock;
1181 }
1182 break;
72d36d05
AG
1183 case IOMAP_ZERO:
1184 if (iomap->type == IOMAP_HOLE)
1185 goto out_unlock;
1186 break;
34aad20b
AG
1187 default:
1188 goto out_unlock;
1189 }
1190
1191 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
1192
1193out_unlock:
1194 if (ret && gfs2_iomap_need_write_lock(flags))
1195 gfs2_write_unlock(inode);
c26b5aa8 1196 release_metapath(&mp);
34aad20b 1197out:
628e366d
AG
1198 trace_gfs2_iomap_end(ip, iomap, ret);
1199 return ret;
1200}
1201
64bc06bb
AG
1202static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1203 ssize_t written, unsigned flags, struct iomap *iomap)
1204{
1205 struct gfs2_inode *ip = GFS2_I(inode);
1206 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1207
72d36d05 1208 switch (flags & (IOMAP_WRITE | IOMAP_ZERO)) {
34aad20b
AG
1209 case IOMAP_WRITE:
1210 if (flags & IOMAP_DIRECT)
1211 return 0;
1212 break;
72d36d05
AG
1213 case IOMAP_ZERO:
1214 if (iomap->type == IOMAP_HOLE)
1215 return 0;
1216 break;
34aad20b
AG
1217 default:
1218 return 0;
1219 }
64bc06bb 1220
d0a22a4b 1221 if (!gfs2_is_stuffed(ip))
64bc06bb
AG
1222 gfs2_ordered_add_inode(ip);
1223
d0a22a4b 1224 if (inode == sdp->sd_rindex)
64bc06bb 1225 adjust_fs_space(inode);
64bc06bb 1226
64bc06bb
AG
1227 gfs2_inplace_release(ip);
1228
1229 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1230 /* Deallocate blocks that were just allocated. */
1231 loff_t blockmask = i_blocksize(inode) - 1;
1232 loff_t end = (pos + length) & ~blockmask;
1233
1234 pos = (pos + written + blockmask) & ~blockmask;
1235 if (pos < end) {
1236 truncate_pagecache_range(inode, pos, end - 1);
1237 punch_hole(ip, pos, end - pos);
1238 }
1239 }
1240
1241 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1242 gfs2_quota_unlock(ip);
706cb549
AG
1243
1244 if (unlikely(!written))
1245 goto out_unlock;
1246
8d3e72a1
AG
1247 if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1248 mark_inode_dirty(inode);
706cb549 1249 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
64bc06bb 1250
706cb549 1251out_unlock:
34aad20b
AG
1252 if (gfs2_iomap_need_write_lock(flags))
1253 gfs2_write_unlock(inode);
64bc06bb
AG
1254 return 0;
1255}
1256
628e366d
AG
1257const struct iomap_ops gfs2_iomap_ops = {
1258 .iomap_begin = gfs2_iomap_begin,
64bc06bb 1259 .iomap_end = gfs2_iomap_end,
628e366d
AG
1260};
1261
3974320c 1262/**
d39d18e0 1263 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
1264 * @inode: The inode
1265 * @lblock: The logical block number
1266 * @bh_map: The bh to be mapped
1267 * @create: True if its ok to alloc blocks to satify the request
1268 *
d39d18e0
AG
1269 * The size of the requested mapping is defined in bh_map->b_size.
1270 *
1271 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1272 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1273 * bh_map->b_size to indicate the size of the mapping when @lblock and
1274 * successive blocks are mapped, up to the requested size.
1275 *
1276 * Sets buffer_boundary() if a read of metadata will be required
1277 * before the next block can be mapped. Sets buffer_new() if new
1278 * blocks were allocated.
3974320c
BP
1279 *
1280 * Returns: errno
1281 */
1282
1283int gfs2_block_map(struct inode *inode, sector_t lblock,
1284 struct buffer_head *bh_map, int create)
1285{
1286 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
1287 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1288 loff_t length = bh_map->b_size;
1289 struct metapath mp = { .mp_aheight = 1, };
1290 struct iomap iomap = { };
1291 int ret;
3974320c
BP
1292
1293 clear_buffer_mapped(bh_map);
1294 clear_buffer_new(bh_map);
1295 clear_buffer_boundary(bh_map);
1296 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1297
628e366d
AG
1298 if (create) {
1299 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1300 if (!ret && iomap.type == IOMAP_HOLE)
bb4cb25d 1301 ret = gfs2_iomap_alloc(inode, &iomap, &mp);
628e366d
AG
1302 release_metapath(&mp);
1303 } else {
1304 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1305 release_metapath(&mp);
3974320c 1306 }
628e366d
AG
1307 if (ret)
1308 goto out;
3974320c
BP
1309
1310 if (iomap.length > bh_map->b_size) {
1311 iomap.length = bh_map->b_size;
7ee66c03 1312 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 1313 }
3974320c
BP
1314 if (iomap.addr != IOMAP_NULL_ADDR)
1315 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1316 bh_map->b_size = iomap.length;
7ee66c03 1317 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
1318 set_buffer_boundary(bh_map);
1319 if (iomap.flags & IOMAP_F_NEW)
1320 set_buffer_new(bh_map);
1321
1322out:
1323 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1324 return ret;
fd88de56
SW
1325}
1326
941e6d7d
SW
1327/*
1328 * Deprecated: do not use in new code
1329 */
fd88de56
SW
1330int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1331{
23591256 1332 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 1333 int ret;
fd88de56
SW
1334 int create = *new;
1335
1336 BUG_ON(!extlen);
1337 BUG_ON(!dblock);
1338 BUG_ON(!new);
1339
47a9a527 1340 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 1341 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
1342 *extlen = bh.b_size >> inode->i_blkbits;
1343 *dblock = bh.b_blocknr;
1344 if (buffer_new(&bh))
1345 *new = 1;
1346 else
1347 *new = 0;
1348 return ret;
b3b94faa
DT
1349}
1350
bdba0d5e
AG
1351static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1352 unsigned int length)
ba7f7290 1353{
2257e468 1354 return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
ba7f7290
SW
1355}
1356
c62baf65
FF
1357#define GFS2_JTRUNC_REVOKES 8192
1358
fa731fc4
SW
1359/**
1360 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1361 * @inode: The inode being truncated
1362 * @oldsize: The original (larger) size
1363 * @newsize: The new smaller size
1364 *
1365 * With jdata files, we have to journal a revoke for each block which is
1366 * truncated. As a result, we need to split this into separate transactions
1367 * if the number of pages being truncated gets too large.
1368 */
1369
fa731fc4
SW
1370static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1371{
1372 struct gfs2_sbd *sdp = GFS2_SB(inode);
1373 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1374 u64 chunk;
1375 int error;
1376
1377 while (oldsize != newsize) {
e7fdf004
AG
1378 struct gfs2_trans *tr;
1379 unsigned int offs;
1380
fa731fc4
SW
1381 chunk = oldsize - newsize;
1382 if (chunk > max_chunk)
1383 chunk = max_chunk;
e7fdf004
AG
1384
1385 offs = oldsize & ~PAGE_MASK;
1386 if (offs && chunk > PAGE_SIZE)
1387 chunk = offs + ((chunk - offs) & PAGE_MASK);
1388
7caef267 1389 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1390 oldsize -= chunk;
e7fdf004
AG
1391
1392 tr = current->journal_info;
1393 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1394 continue;
1395
fa731fc4
SW
1396 gfs2_trans_end(sdp);
1397 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1398 if (error)
1399 return error;
1400 }
1401
1402 return 0;
1403}
1404
8b5860a3 1405static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1406{
ff8f33c8
SW
1407 struct gfs2_inode *ip = GFS2_I(inode);
1408 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1409 struct buffer_head *dibh = NULL;
b3b94faa 1410 int journaled = gfs2_is_jdata(ip);
8b5860a3 1411 u64 oldsize = inode->i_size;
b3b94faa
DT
1412 int error;
1413
fa731fc4
SW
1414 if (journaled)
1415 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1416 else
1417 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1418 if (error)
1419 return error;
1420
1421 error = gfs2_meta_inode_buffer(ip, &dibh);
1422 if (error)
1423 goto out;
1424
350a9b0a 1425 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1426
b3b94faa 1427 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1428 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1429 } else {
bdba0d5e
AG
1430 unsigned int blocksize = i_blocksize(inode);
1431 unsigned int offs = newsize & (blocksize - 1);
1432 if (offs) {
1433 error = gfs2_block_zero_range(inode, newsize,
1434 blocksize - offs);
ff8f33c8 1435 if (error)
80990f40 1436 goto out;
b3b94faa 1437 }
ff8f33c8 1438 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1439 }
1440
ff8f33c8 1441 i_size_write(inode, newsize);
078cd827 1442 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1443 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1444
fa731fc4
SW
1445 if (journaled)
1446 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1447 else
7caef267 1448 truncate_pagecache(inode, newsize);
fa731fc4 1449
a91ea69f 1450out:
80990f40
AG
1451 brelse(dibh);
1452 if (current->journal_info)
1453 gfs2_trans_end(sdp);
b3b94faa
DT
1454 return error;
1455}
1456
628e366d
AG
1457int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1458 struct iomap *iomap)
1459{
1460 struct metapath mp = { .mp_aheight = 1, };
1461 int ret;
1462
1463 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1464 if (!ret && iomap->type == IOMAP_HOLE)
bb4cb25d 1465 ret = gfs2_iomap_alloc(inode, iomap, &mp);
628e366d
AG
1466 release_metapath(&mp);
1467 return ret;
1468}
1469
d552a2b9
BP
1470/**
1471 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1472 * @ip: inode
1473 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1474 * @bh: buffer head to sweep
1475 * @start: starting point in bh
1476 * @end: end point in bh
1477 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1478 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1479 *
1480 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1481 * free, and free them all. However, we do it one rgrp at a time. If this
1482 * block has references to multiple rgrps, we break it into individual
1483 * transactions. This allows other processes to use the rgrps while we're
1484 * focused on a single one, for better concurrency / performance.
1485 * At every transaction boundary, we rewrite the inode into the journal.
1486 * That way the bitmaps are kept consistent with the inode and we can recover
1487 * if we're interrupted by power-outages.
1488 *
1489 * Returns: 0, or return code if an error occurred.
1490 * *btotal has the total number of blocks freed
1491 */
1492static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1493 struct buffer_head *bh, __be64 *start, __be64 *end,
1494 bool meta, u32 *btotal)
b3b94faa 1495{
9b8c81d1 1496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1497 struct gfs2_rgrpd *rgd;
1498 struct gfs2_trans *tr;
5cf26b1e 1499 __be64 *p;
d552a2b9
BP
1500 int blks_outside_rgrp;
1501 u64 bn, bstart, isize_blks;
1502 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1503 int ret = 0;
1504 bool buf_in_tr = false; /* buffer was added to transaction */
1505
d552a2b9 1506more_rgrps:
5cf26b1e
AG
1507 rgd = NULL;
1508 if (gfs2_holder_initialized(rd_gh)) {
1509 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1510 gfs2_assert_withdraw(sdp,
1511 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1512 }
d552a2b9
BP
1513 blks_outside_rgrp = 0;
1514 bstart = 0;
1515 blen = 0;
d552a2b9 1516
5cf26b1e 1517 for (p = start; p < end; p++) {
d552a2b9
BP
1518 if (!*p)
1519 continue;
1520 bn = be64_to_cpu(*p);
5cf26b1e
AG
1521
1522 if (rgd) {
1523 if (!rgrp_contains_block(rgd, bn)) {
1524 blks_outside_rgrp++;
1525 continue;
1526 }
d552a2b9 1527 } else {
90bcab99 1528 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1529 if (unlikely(!rgd)) {
1530 ret = -EIO;
1531 goto out;
1532 }
d552a2b9
BP
1533 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1534 0, rd_gh);
1535 if (ret)
1536 goto out;
1537
1538 /* Must be done with the rgrp glock held: */
1539 if (gfs2_rs_active(&ip->i_res) &&
1540 rgd == ip->i_res.rs_rbm.rgd)
1541 gfs2_rs_deltree(&ip->i_res);
1542 }
1543
d552a2b9
BP
1544 /* The size of our transactions will be unknown until we
1545 actually process all the metadata blocks that relate to
1546 the rgrp. So we estimate. We know it can't be more than
1547 the dinode's i_blocks and we don't want to exceed the
1548 journal flush threshold, sd_log_thresh2. */
1549 if (current->journal_info == NULL) {
1550 unsigned int jblocks_rqsted, revokes;
1551
1552 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1553 RES_INDIRECT;
1554 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1555 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1556 jblocks_rqsted +=
1557 atomic_read(&sdp->sd_log_thresh2);
1558 else
1559 jblocks_rqsted += isize_blks;
1560 revokes = jblocks_rqsted;
1561 if (meta)
5cf26b1e 1562 revokes += end - start;
d552a2b9
BP
1563 else if (ip->i_depth)
1564 revokes += sdp->sd_inptrs;
1565 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1566 if (ret)
1567 goto out_unlock;
1568 down_write(&ip->i_rw_mutex);
1569 }
1570 /* check if we will exceed the transaction blocks requested */
1571 tr = current->journal_info;
1572 if (tr->tr_num_buf_new + RES_STATFS +
1573 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1574 /* We set blks_outside_rgrp to ensure the loop will
1575 be repeated for the same rgrp, but with a new
1576 transaction. */
1577 blks_outside_rgrp++;
1578 /* This next part is tricky. If the buffer was added
1579 to the transaction, we've already set some block
1580 pointers to 0, so we better follow through and free
1581 them, or we will introduce corruption (so break).
1582 This may be impossible, or at least rare, but I
1583 decided to cover the case regardless.
1584
1585 If the buffer was not added to the transaction
1586 (this call), doing so would exceed our transaction
1587 size, so we need to end the transaction and start a
1588 new one (so goto). */
1589
1590 if (buf_in_tr)
1591 break;
1592 goto out_unlock;
1593 }
1594
1595 gfs2_trans_add_meta(ip->i_gl, bh);
1596 buf_in_tr = true;
1597 *p = 0;
1598 if (bstart + blen == bn) {
1599 blen++;
1600 continue;
1601 }
1602 if (bstart) {
0ddeded4 1603 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1604 (*btotal) += blen;
1605 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1606 }
1607 bstart = bn;
1608 blen = 1;
1609 }
1610 if (bstart) {
0ddeded4 1611 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1612 (*btotal) += blen;
1613 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1614 }
1615out_unlock:
1616 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1617 outside the rgrp we just processed,
1618 do it all over again. */
1619 if (current->journal_info) {
5cf26b1e
AG
1620 struct buffer_head *dibh;
1621
1622 ret = gfs2_meta_inode_buffer(ip, &dibh);
1623 if (ret)
1624 goto out;
d552a2b9
BP
1625
1626 /* Every transaction boundary, we rewrite the dinode
1627 to keep its di_blocks current in case of failure. */
1628 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1629 current_time(&ip->i_inode);
d552a2b9
BP
1630 gfs2_trans_add_meta(ip->i_gl, dibh);
1631 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1632 brelse(dibh);
d552a2b9
BP
1633 up_write(&ip->i_rw_mutex);
1634 gfs2_trans_end(sdp);
1635 }
1636 gfs2_glock_dq_uninit(rd_gh);
1637 cond_resched();
1638 goto more_rgrps;
1639 }
1640out:
1641 return ret;
1642}
1643
10d2cf94
AG
1644static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1645{
1646 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1647 return false;
1648 return true;
1649}
1650
d552a2b9
BP
1651/**
1652 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1653 * @mp: starting metapath
1654 * @h: desired height to search
1655 *
10d2cf94 1656 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1657 * Returns: true if a non-null pointer was found in the metapath buffer
1658 * false if all remaining pointers are NULL in the buffer
1659 */
1660static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1661 unsigned int h,
1662 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1663{
10d2cf94
AG
1664 struct buffer_head *bh = mp->mp_bh[h];
1665 __be64 *first, *ptr, *end;
1666
1667 first = metaptr1(h, mp);
1668 ptr = first + mp->mp_list[h];
1669 end = (__be64 *)(bh->b_data + bh->b_size);
1670 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1671 bool keep_end = h < end_aligned;
1672 end = first + end_list[h] + keep_end;
1673 }
d552a2b9 1674
10d2cf94 1675 while (ptr < end) {
c4a9d189 1676 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1677 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1678 h++;
1679 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1680 mp->mp_list[h] = 0;
d552a2b9 1681 return true;
c4a9d189 1682 }
10d2cf94 1683 ptr++;
d552a2b9 1684 }
10d2cf94 1685 return false;
d552a2b9
BP
1686}
1687
1688enum dealloc_states {
1689 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1690 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1691 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1692 DEALLOC_DONE = 3, /* process complete */
1693};
b3b94faa 1694
5cf26b1e
AG
1695static inline void
1696metapointer_range(struct metapath *mp, int height,
1697 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1698 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1699 __be64 **start, __be64 **end)
1700{
1701 struct buffer_head *bh = mp->mp_bh[height];
1702 __be64 *first;
1703
1704 first = metaptr1(height, mp);
1705 *start = first;
1706 if (mp_eq_to_hgt(mp, start_list, height)) {
1707 bool keep_start = height < start_aligned;
1708 *start = first + start_list[height] + keep_start;
1709 }
1710 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1711 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1712 bool keep_end = height < end_aligned;
1713 *end = first + end_list[height] + keep_end;
1714 }
1715}
1716
1717static inline bool walk_done(struct gfs2_sbd *sdp,
1718 struct metapath *mp, int height,
1719 __u16 *end_list, unsigned int end_aligned)
1720{
1721 __u16 end;
1722
1723 if (end_list) {
1724 bool keep_end = height < end_aligned;
1725 if (!mp_eq_to_hgt(mp, end_list, height))
1726 return false;
1727 end = end_list[height] + keep_end;
1728 } else
1729 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1730 return mp->mp_list[height] >= end;
5cf26b1e
AG
1731}
1732
d552a2b9 1733/**
10d2cf94 1734 * punch_hole - deallocate blocks in a file
d552a2b9 1735 * @ip: inode to truncate
10d2cf94
AG
1736 * @offset: the start of the hole
1737 * @length: the size of the hole (or 0 for truncate)
1738 *
1739 * Punch a hole into a file or truncate a file at a given position. This
1740 * function operates in whole blocks (@offset and @length are rounded
1741 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1742 *
10d2cf94
AG
1743 * This function works from the bottom up, and from the right to the left. In
1744 * other words, it strips off the highest layer (data) before stripping any of
1745 * the metadata. Doing it this way is best in case the operation is interrupted
1746 * by power failure, etc. The dinode is rewritten in every transaction to
1747 * guarantee integrity.
d552a2b9 1748 */
10d2cf94 1749static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1750{
1751 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1752 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1753 struct metapath mp = {};
d552a2b9
BP
1754 struct buffer_head *dibh, *bh;
1755 struct gfs2_holder rd_gh;
cb7f0903 1756 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1757 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1758 __u16 start_list[GFS2_MAX_META_HEIGHT];
1759 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1760 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1761 unsigned int strip_h = ip->i_height - 1;
1762 u32 btotal = 0;
1763 int ret, state;
1764 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1765 u64 prev_bnr = 0;
5cf26b1e 1766 __be64 *start, *end;
b3b94faa 1767
bb491ce6
AG
1768 if (offset >= maxsize) {
1769 /*
1770 * The starting point lies beyond the allocated meta-data;
1771 * there are no blocks do deallocate.
1772 */
1773 return 0;
1774 }
1775
10d2cf94
AG
1776 /*
1777 * The start position of the hole is defined by lblock, start_list, and
1778 * start_aligned. The end position of the hole is defined by lend,
1779 * end_list, and end_aligned.
1780 *
1781 * start_aligned and end_aligned define down to which height the start
1782 * and end positions are aligned to the metadata tree (i.e., the
1783 * position is a multiple of the metadata granularity at the height
1784 * above). This determines at which heights additional meta pointers
1785 * needs to be preserved for the remaining data.
1786 */
b3b94faa 1787
10d2cf94 1788 if (length) {
10d2cf94
AG
1789 u64 end_offset = offset + length;
1790 u64 lend;
1791
1792 /*
1793 * Clip the end at the maximum file size for the given height:
1794 * that's how far the metadata goes; files bigger than that
1795 * will have additional layers of indirection.
1796 */
1797 if (end_offset > maxsize)
1798 end_offset = maxsize;
1799 lend = end_offset >> bsize_shift;
1800
1801 if (lblock >= lend)
1802 return 0;
1803
1804 find_metapath(sdp, lend, &mp, ip->i_height);
1805 end_list = __end_list;
1806 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1807
1808 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1809 if (end_list[mp_h])
1810 break;
1811 }
1812 end_aligned = mp_h;
1813 }
1814
1815 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1816 memcpy(start_list, mp.mp_list, sizeof(start_list));
1817
cb7f0903
AG
1818 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1819 if (start_list[mp_h])
1820 break;
1821 }
1822 start_aligned = mp_h;
d552a2b9
BP
1823
1824 ret = gfs2_meta_inode_buffer(ip, &dibh);
1825 if (ret)
1826 return ret;
b3b94faa 1827
d552a2b9
BP
1828 mp.mp_bh[0] = dibh;
1829 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1830 if (ret)
1831 goto out_metapath;
c3ce5aa9
AG
1832
1833 /* issue read-ahead on metadata */
5cf26b1e
AG
1834 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1835 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1836 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1837 gfs2_metapath_ra(ip->i_gl, start, end);
1838 }
c3ce5aa9 1839
e8b43fe0 1840 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1841 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1842 else
1843 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1844
d552a2b9
BP
1845 ret = gfs2_rindex_update(sdp);
1846 if (ret)
1847 goto out_metapath;
1848
1849 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1850 if (ret)
1851 goto out_metapath;
1852 gfs2_holder_mark_uninitialized(&rd_gh);
1853
1854 mp_h = strip_h;
1855
1856 while (state != DEALLOC_DONE) {
1857 switch (state) {
1858 /* Truncate a full metapath at the given strip height.
1859 * Note that strip_h == mp_h in order to be in this state. */
1860 case DEALLOC_MP_FULL:
d552a2b9
BP
1861 bh = mp.mp_bh[mp_h];
1862 gfs2_assert_withdraw(sdp, bh);
1863 if (gfs2_assert_withdraw(sdp,
1864 prev_bnr != bh->b_blocknr)) {
f29e62ee
BP
1865 fs_emerg(sdp, "inode %llu, block:%llu, i_h:%u,"
1866 "s_h:%u, mp_h:%u\n",
d552a2b9
BP
1867 (unsigned long long)ip->i_no_addr,
1868 prev_bnr, ip->i_height, strip_h, mp_h);
1869 }
1870 prev_bnr = bh->b_blocknr;
cb7f0903 1871
5cf26b1e
AG
1872 if (gfs2_metatype_check(sdp, bh,
1873 (mp_h ? GFS2_METATYPE_IN :
1874 GFS2_METATYPE_DI))) {
1875 ret = -EIO;
1876 goto out;
1877 }
1878
10d2cf94
AG
1879 /*
1880 * Below, passing end_aligned as 0 gives us the
1881 * metapointer range excluding the end point: the end
1882 * point is the first metapath we must not deallocate!
1883 */
1884
5cf26b1e 1885 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1886 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1887 &start, &end);
1888 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1889 start, end,
1890 mp_h != ip->i_height - 1,
1891 &btotal);
cb7f0903 1892
d552a2b9
BP
1893 /* If we hit an error or just swept dinode buffer,
1894 just exit. */
1895 if (ret || !mp_h) {
1896 state = DEALLOC_DONE;
1897 break;
1898 }
1899 state = DEALLOC_MP_LOWER;
1900 break;
1901
1902 /* lower the metapath strip height */
1903 case DEALLOC_MP_LOWER:
1904 /* We're done with the current buffer, so release it,
1905 unless it's the dinode buffer. Then back up to the
1906 previous pointer. */
1907 if (mp_h) {
1908 brelse(mp.mp_bh[mp_h]);
1909 mp.mp_bh[mp_h] = NULL;
1910 }
1911 /* If we can't get any lower in height, we've stripped
1912 off all we can. Next step is to back up and start
1913 stripping the previous level of metadata. */
1914 if (mp_h == 0) {
1915 strip_h--;
cb7f0903 1916 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1917 mp_h = strip_h;
1918 state = DEALLOC_FILL_MP;
1919 break;
1920 }
1921 mp.mp_list[mp_h] = 0;
1922 mp_h--; /* search one metadata height down */
d552a2b9 1923 mp.mp_list[mp_h]++;
10d2cf94
AG
1924 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1925 break;
d552a2b9
BP
1926 /* Here we've found a part of the metapath that is not
1927 * allocated. We need to search at that height for the
1928 * next non-null pointer. */
10d2cf94 1929 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1930 state = DEALLOC_FILL_MP;
1931 mp_h++;
1932 }
1933 /* No more non-null pointers at this height. Back up
1934 to the previous height and try again. */
1935 break; /* loop around in the same state */
1936
1937 /* Fill the metapath with buffers to the given height. */
1938 case DEALLOC_FILL_MP:
1939 /* Fill the buffers out to the current height. */
1940 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1941 if (ret < 0)
d552a2b9 1942 goto out;
c3ce5aa9 1943
e7445ced
AG
1944 /* On the first pass, issue read-ahead on metadata. */
1945 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1946 unsigned int height = mp.mp_aheight - 1;
1947
1948 /* No read-ahead for data blocks. */
1949 if (mp.mp_aheight - 1 == strip_h)
1950 height--;
1951
1952 for (; height >= mp.mp_aheight - ret; height--) {
1953 metapointer_range(&mp, height,
5cf26b1e 1954 start_list, start_aligned,
10d2cf94 1955 end_list, end_aligned,
5cf26b1e
AG
1956 &start, &end);
1957 gfs2_metapath_ra(ip->i_gl, start, end);
1958 }
c3ce5aa9 1959 }
d552a2b9
BP
1960
1961 /* If buffers found for the entire strip height */
e8b43fe0 1962 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1963 state = DEALLOC_MP_FULL;
1964 break;
1965 }
e8b43fe0
AG
1966 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1967 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1968
1969 /* If we find a non-null block pointer, crawl a bit
1970 higher up in the metapath and try again, otherwise
1971 we need to look lower for a new starting point. */
10d2cf94 1972 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1973 mp_h++;
1974 else
1975 state = DEALLOC_MP_LOWER;
b3b94faa 1976 break;
d552a2b9 1977 }
b3b94faa
DT
1978 }
1979
d552a2b9
BP
1980 if (btotal) {
1981 if (current->journal_info == NULL) {
1982 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1983 RES_QUOTA, 0);
1984 if (ret)
1985 goto out;
1986 down_write(&ip->i_rw_mutex);
1987 }
1988 gfs2_statfs_change(sdp, 0, +btotal, 0);
1989 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1990 ip->i_inode.i_gid);
b32c8c76 1991 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1992 gfs2_trans_add_meta(ip->i_gl, dibh);
1993 gfs2_dinode_out(ip, dibh->b_data);
1994 up_write(&ip->i_rw_mutex);
1995 gfs2_trans_end(sdp);
1996 }
b3b94faa 1997
d552a2b9
BP
1998out:
1999 if (gfs2_holder_initialized(&rd_gh))
2000 gfs2_glock_dq_uninit(&rd_gh);
2001 if (current->journal_info) {
2002 up_write(&ip->i_rw_mutex);
2003 gfs2_trans_end(sdp);
2004 cond_resched();
2005 }
2006 gfs2_quota_unhold(ip);
2007out_metapath:
2008 release_metapath(&mp);
2009 return ret;
b3b94faa
DT
2010}
2011
2012static int trunc_end(struct gfs2_inode *ip)
2013{
feaa7bba 2014 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2015 struct buffer_head *dibh;
2016 int error;
2017
2018 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2019 if (error)
2020 return error;
2021
2022 down_write(&ip->i_rw_mutex);
2023
2024 error = gfs2_meta_inode_buffer(ip, &dibh);
2025 if (error)
2026 goto out;
2027
a2e0f799 2028 if (!i_size_read(&ip->i_inode)) {
ecc30c79 2029 ip->i_height = 0;
ce276b06 2030 ip->i_goal = ip->i_no_addr;
b3b94faa 2031 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 2032 gfs2_ordered_del_inode(ip);
b3b94faa 2033 }
078cd827 2034 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 2035 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 2036
350a9b0a 2037 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 2038 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
2039 brelse(dibh);
2040
a91ea69f 2041out:
b3b94faa 2042 up_write(&ip->i_rw_mutex);
b3b94faa 2043 gfs2_trans_end(sdp);
b3b94faa
DT
2044 return error;
2045}
2046
2047/**
2048 * do_shrink - make a file smaller
ff8f33c8 2049 * @inode: the inode
ff8f33c8 2050 * @newsize: the size to make the file
b3b94faa 2051 *
ff8f33c8
SW
2052 * Called with an exclusive lock on @inode. The @size must
2053 * be equal to or smaller than the current inode size.
b3b94faa
DT
2054 *
2055 * Returns: errno
2056 */
2057
8b5860a3 2058static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 2059{
ff8f33c8 2060 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
2061 int error;
2062
8b5860a3 2063 error = trunc_start(inode, newsize);
b3b94faa
DT
2064 if (error < 0)
2065 return error;
ff8f33c8 2066 if (gfs2_is_stuffed(ip))
b3b94faa
DT
2067 return 0;
2068
10d2cf94 2069 error = punch_hole(ip, newsize, 0);
ff8f33c8 2070 if (error == 0)
b3b94faa
DT
2071 error = trunc_end(ip);
2072
2073 return error;
2074}
2075
ff8f33c8 2076void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 2077{
ff8f33c8
SW
2078 int ret;
2079
8b5860a3 2080 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
2081 WARN_ON(ret != 0);
2082}
2083
2084/**
2085 * do_grow - Touch and update inode size
2086 * @inode: The inode
2087 * @size: The new size
2088 *
2089 * This function updates the timestamps on the inode and
2090 * may also increase the size of the inode. This function
2091 * must not be called with @size any smaller than the current
2092 * inode size.
2093 *
2094 * Although it is not strictly required to unstuff files here,
2095 * earlier versions of GFS2 have a bug in the stuffed file reading
2096 * code which will result in a buffer overrun if the size is larger
2097 * than the max stuffed file size. In order to prevent this from
25985edc 2098 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
2099 * just update the inode size directly.
2100 *
2101 * Returns: 0 on success, or -ve on error
2102 */
2103
2104static int do_grow(struct inode *inode, u64 size)
2105{
2106 struct gfs2_inode *ip = GFS2_I(inode);
2107 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 2108 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
2109 struct buffer_head *dibh;
2110 int error;
2f7ee358 2111 int unstuff = 0;
a13b8c5f 2112
235628c5 2113 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 2114 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 2115 if (error)
5407e242 2116 return error;
ff8f33c8 2117
7b9cff46 2118 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
2119 if (error)
2120 goto do_grow_qunlock;
2f7ee358 2121 unstuff = 1;
ff8f33c8
SW
2122 }
2123
a01aedfe 2124 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
bc020561
BP
2125 (unstuff &&
2126 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
a01aedfe
BP
2127 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2128 0 : RES_QUOTA), 0);
a13b8c5f 2129 if (error)
ff8f33c8 2130 goto do_grow_release;
a13b8c5f 2131
2f7ee358 2132 if (unstuff) {
ff8f33c8
SW
2133 error = gfs2_unstuff_dinode(ip, NULL);
2134 if (error)
2135 goto do_end_trans;
2136 }
a13b8c5f
WC
2137
2138 error = gfs2_meta_inode_buffer(ip, &dibh);
2139 if (error)
ff8f33c8 2140 goto do_end_trans;
a13b8c5f 2141
b473bc2d 2142 truncate_setsize(inode, size);
078cd827 2143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 2144 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
2145 gfs2_dinode_out(ip, dibh->b_data);
2146 brelse(dibh);
2147
ff8f33c8 2148do_end_trans:
a13b8c5f 2149 gfs2_trans_end(sdp);
ff8f33c8 2150do_grow_release:
2f7ee358 2151 if (unstuff) {
ff8f33c8
SW
2152 gfs2_inplace_release(ip);
2153do_grow_qunlock:
2154 gfs2_quota_unlock(ip);
ff8f33c8 2155 }
a13b8c5f
WC
2156 return error;
2157}
2158
b3b94faa 2159/**
ff8f33c8
SW
2160 * gfs2_setattr_size - make a file a given size
2161 * @inode: the inode
2162 * @newsize: the size to make the file
b3b94faa 2163 *
ff8f33c8 2164 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 2165 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 2166 * in question.
b3b94faa
DT
2167 *
2168 * Returns: errno
2169 */
2170
ff8f33c8 2171int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 2172{
af5c2697 2173 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 2174 int ret;
b3b94faa 2175
ff8f33c8 2176 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 2177
ff8f33c8
SW
2178 ret = inode_newsize_ok(inode, newsize);
2179 if (ret)
2180 return ret;
b3b94faa 2181
562c72aa
CH
2182 inode_dio_wait(inode);
2183
b54e9a0b 2184 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 2185 if (ret)
2b3dcf35 2186 goto out;
d2b47cfb 2187
8b5860a3 2188 if (newsize >= inode->i_size) {
2b3dcf35
BP
2189 ret = do_grow(inode, newsize);
2190 goto out;
2191 }
ff8f33c8 2192
8b5860a3 2193 ret = do_shrink(inode, newsize);
2b3dcf35 2194out:
a097dc7e 2195 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 2196 return ret;
b3b94faa
DT
2197}
2198
2199int gfs2_truncatei_resume(struct gfs2_inode *ip)
2200{
2201 int error;
10d2cf94 2202 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
2203 if (!error)
2204 error = trunc_end(ip);
2205 return error;
2206}
2207
2208int gfs2_file_dealloc(struct gfs2_inode *ip)
2209{
10d2cf94 2210 return punch_hole(ip, 0, 0);
b3b94faa
DT
2211}
2212
b50f227b
SW
2213/**
2214 * gfs2_free_journal_extents - Free cached journal bmap info
2215 * @jd: The journal
2216 *
2217 */
2218
2219void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2220{
2221 struct gfs2_journal_extent *jext;
2222
2223 while(!list_empty(&jd->extent_list)) {
2224 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2225 list_del(&jext->list);
2226 kfree(jext);
2227 }
2228}
2229
2230/**
2231 * gfs2_add_jextent - Add or merge a new extent to extent cache
2232 * @jd: The journal descriptor
2233 * @lblock: The logical block at start of new extent
c62baf65 2234 * @dblock: The physical block at start of new extent
b50f227b
SW
2235 * @blocks: Size of extent in fs blocks
2236 *
2237 * Returns: 0 on success or -ENOMEM
2238 */
2239
2240static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2241{
2242 struct gfs2_journal_extent *jext;
2243
2244 if (!list_empty(&jd->extent_list)) {
2245 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2246 if ((jext->dblock + jext->blocks) == dblock) {
2247 jext->blocks += blocks;
2248 return 0;
2249 }
2250 }
2251
2252 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2253 if (jext == NULL)
2254 return -ENOMEM;
2255 jext->dblock = dblock;
2256 jext->lblock = lblock;
2257 jext->blocks = blocks;
2258 list_add_tail(&jext->list, &jd->extent_list);
2259 jd->nr_extents++;
2260 return 0;
2261}
2262
2263/**
2264 * gfs2_map_journal_extents - Cache journal bmap info
2265 * @sdp: The super block
2266 * @jd: The journal to map
2267 *
2268 * Create a reusable "extent" mapping from all logical
2269 * blocks to all physical blocks for the given journal. This will save
2270 * us time when writing journal blocks. Most journals will have only one
2271 * extent that maps all their logical blocks. That's because gfs2.mkfs
2272 * arranges the journal blocks sequentially to maximize performance.
2273 * So the extent would map the first block for the entire file length.
2274 * However, gfs2_jadd can happen while file activity is happening, so
2275 * those journals may not be sequential. Less likely is the case where
2276 * the users created their own journals by mounting the metafs and
2277 * laying it out. But it's still possible. These journals might have
2278 * several extents.
2279 *
2280 * Returns: 0 on success, or error on failure
2281 */
2282
2283int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2284{
2285 u64 lblock = 0;
2286 u64 lblock_stop;
2287 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2288 struct buffer_head bh;
2289 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2290 u64 size;
2291 int rc;
98583b3e 2292 ktime_t start, end;
b50f227b 2293
98583b3e 2294 start = ktime_get();
b50f227b
SW
2295 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2296 size = (lblock_stop - lblock) << shift;
2297 jd->nr_extents = 0;
2298 WARN_ON(!list_empty(&jd->extent_list));
2299
2300 do {
2301 bh.b_state = 0;
2302 bh.b_blocknr = 0;
2303 bh.b_size = size;
2304 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2305 if (rc || !buffer_mapped(&bh))
2306 goto fail;
2307 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2308 if (rc)
2309 goto fail;
2310 size -= bh.b_size;
2311 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2312 } while(size > 0);
2313
98583b3e
AD
2314 end = ktime_get();
2315 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2316 jd->nr_extents, ktime_ms_delta(end, start));
b50f227b
SW
2317 return 0;
2318
2319fail:
2320 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2321 rc, jd->jd_jid,
2322 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2323 jd->nr_extents);
2324 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2325 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2326 bh.b_state, (unsigned long long)bh.b_size);
2327 gfs2_free_journal_extents(jd);
2328 return rc;
2329}
2330
b3b94faa
DT
2331/**
2332 * gfs2_write_alloc_required - figure out if a write will require an allocation
2333 * @ip: the file being written to
2334 * @offset: the offset to write to
2335 * @len: the number of bytes being written
b3b94faa 2336 *
461cb419 2337 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2338 */
2339
cd915493 2340int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2341 unsigned int len)
b3b94faa 2342{
feaa7bba 2343 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2344 struct buffer_head bh;
2345 unsigned int shift;
2346 u64 lblock, lblock_stop, size;
7ed122e4 2347 u64 end_of_file;
b3b94faa 2348
b3b94faa
DT
2349 if (!len)
2350 return 0;
2351
2352 if (gfs2_is_stuffed(ip)) {
235628c5 2353 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2354 return 1;
b3b94faa
DT
2355 return 0;
2356 }
2357
941e6d7d 2358 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2359 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2360 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2361 lblock = offset >> shift;
2362 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
77612578 2363 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
461cb419 2364 return 1;
b3b94faa 2365
941e6d7d
SW
2366 size = (lblock_stop - lblock) << shift;
2367 do {
2368 bh.b_state = 0;
2369 bh.b_size = size;
2370 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2371 if (!buffer_mapped(&bh))
461cb419 2372 return 1;
941e6d7d
SW
2373 size -= bh.b_size;
2374 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2375 } while(size > 0);
b3b94faa
DT
2376
2377 return 0;
2378}
2379
4e56a641
AG
2380static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2381{
2382 struct gfs2_inode *ip = GFS2_I(inode);
2383 struct buffer_head *dibh;
2384 int error;
2385
2386 if (offset >= inode->i_size)
2387 return 0;
2388 if (offset + length > inode->i_size)
2389 length = inode->i_size - offset;
2390
2391 error = gfs2_meta_inode_buffer(ip, &dibh);
2392 if (error)
2393 return error;
2394 gfs2_trans_add_meta(ip->i_gl, dibh);
2395 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2396 length);
2397 brelse(dibh);
2398 return 0;
2399}
2400
2401static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2402 loff_t length)
2403{
2404 struct gfs2_sbd *sdp = GFS2_SB(inode);
2405 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2406 int error;
2407
2408 while (length) {
2409 struct gfs2_trans *tr;
2410 loff_t chunk;
2411 unsigned int offs;
2412
2413 chunk = length;
2414 if (chunk > max_chunk)
2415 chunk = max_chunk;
2416
2417 offs = offset & ~PAGE_MASK;
2418 if (offs && chunk > PAGE_SIZE)
2419 chunk = offs + ((chunk - offs) & PAGE_MASK);
2420
2421 truncate_pagecache_range(inode, offset, chunk);
2422 offset += chunk;
2423 length -= chunk;
2424
2425 tr = current->journal_info;
2426 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2427 continue;
2428
2429 gfs2_trans_end(sdp);
2430 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2431 if (error)
2432 return error;
2433 }
2434 return 0;
2435}
2436
2437int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2438{
2439 struct inode *inode = file_inode(file);
2440 struct gfs2_inode *ip = GFS2_I(inode);
2441 struct gfs2_sbd *sdp = GFS2_SB(inode);
2442 int error;
2443
2444 if (gfs2_is_jdata(ip))
2445 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2446 GFS2_JTRUNC_REVOKES);
2447 else
2448 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2449 if (error)
2450 return error;
2451
2452 if (gfs2_is_stuffed(ip)) {
2453 error = stuffed_zero_range(inode, offset, length);
2454 if (error)
2455 goto out;
2456 } else {
00251a16 2457 unsigned int start_off, end_len, blocksize;
4e56a641
AG
2458
2459 blocksize = i_blocksize(inode);
2460 start_off = offset & (blocksize - 1);
00251a16 2461 end_len = (offset + length) & (blocksize - 1);
4e56a641
AG
2462 if (start_off) {
2463 unsigned int len = length;
2464 if (length > blocksize - start_off)
2465 len = blocksize - start_off;
2466 error = gfs2_block_zero_range(inode, offset, len);
2467 if (error)
2468 goto out;
2469 if (start_off + length < blocksize)
00251a16 2470 end_len = 0;
4e56a641 2471 }
00251a16 2472 if (end_len) {
4e56a641 2473 error = gfs2_block_zero_range(inode,
00251a16 2474 offset + length - end_len, end_len);
4e56a641
AG
2475 if (error)
2476 goto out;
2477 }
2478 }
2479
2480 if (gfs2_is_jdata(ip)) {
2481 BUG_ON(!current->journal_info);
2482 gfs2_journaled_truncate_range(inode, offset, length);
2483 } else
2484 truncate_pagecache_range(inode, offset, offset + length - 1);
2485
2486 file_update_time(file);
2487 mark_inode_dirty(inode);
2488
2489 if (current->journal_info)
2490 gfs2_trans_end(sdp);
2491
2492 if (!gfs2_is_stuffed(ip))
2493 error = punch_hole(ip, offset, length);
2494
2495out:
2496 if (current->journal_info)
2497 gfs2_trans_end(sdp);
2498 return error;
2499}