gfs2: dump fsid when dumping glock problems
[linux-2.6-block.git] / fs / gfs2 / bmap.c
CommitLineData
7336d0e6 1// SPDX-License-Identifier: GPL-2.0-only
b3b94faa
DT
2/*
3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
5 */
6
b3b94faa
DT
7#include <linux/spinlock.h>
8#include <linux/completion.h>
9#include <linux/buffer_head.h>
64dd153c 10#include <linux/blkdev.h>
5c676f6d 11#include <linux/gfs2_ondisk.h>
71b86f56 12#include <linux/crc32.h>
3974320c 13#include <linux/iomap.h>
98583b3e 14#include <linux/ktime.h>
b3b94faa
DT
15
16#include "gfs2.h"
5c676f6d 17#include "incore.h"
b3b94faa
DT
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
b3b94faa 21#include "meta_io.h"
b3b94faa
DT
22#include "quota.h"
23#include "rgrp.h"
45138990 24#include "log.h"
4c16c36a 25#include "super.h"
b3b94faa 26#include "trans.h"
18ec7d5c 27#include "dir.h"
5c676f6d 28#include "util.h"
64bc06bb 29#include "aops.h"
63997775 30#include "trace_gfs2.h"
b3b94faa
DT
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
dbac6710 37 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
39 int mp_fheight; /* find_metapath height */
40 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
41};
42
64bc06bb
AG
43static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
44
f25ef0c1
SW
45/**
46 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
47 * @ip: the inode
48 * @dibh: the dinode buffer
49 * @block: the block number that was allocated
ff8f33c8 50 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
51 *
52 * Returns: errno
53 */
54
55static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 56 u64 block, struct page *page)
f25ef0c1 57{
f25ef0c1
SW
58 struct inode *inode = &ip->i_inode;
59 struct buffer_head *bh;
60 int release = 0;
61
62 if (!page || page->index) {
220cca2a 63 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
64 if (!page)
65 return -ENOMEM;
66 release = 1;
67 }
68
69 if (!PageUptodate(page)) {
70 void *kaddr = kmap(page);
602c89d2
SW
71 u64 dsize = i_size_read(inode);
72
235628c5
AG
73 if (dsize > gfs2_max_stuffed_size(ip))
74 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 75
602c89d2 76 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 77 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
78 kunmap(page);
79
80 SetPageUptodate(page);
81 }
82
83 if (!page_has_buffers(page))
47a9a527
FF
84 create_empty_buffers(page, BIT(inode->i_blkbits),
85 BIT(BH_Uptodate));
f25ef0c1
SW
86
87 bh = page_buffers(page);
88
89 if (!buffer_mapped(bh))
90 map_bh(bh, inode->i_sb, block);
91
92 set_buffer_uptodate(bh);
845802b1 93 if (gfs2_is_jdata(ip))
350a9b0a 94 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
95 else {
96 mark_buffer_dirty(bh);
97 gfs2_ordered_add_inode(ip);
98 }
f25ef0c1
SW
99
100 if (release) {
101 unlock_page(page);
09cbfeaf 102 put_page(page);
f25ef0c1
SW
103 }
104
105 return 0;
106}
107
b3b94faa
DT
108/**
109 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
110 * @ip: The GFS2 inode to unstuff
ff8f33c8 111 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
112 *
113 * This routine unstuffs a dinode and returns it to a "normal" state such
114 * that the height can be grown in the traditional way.
115 *
116 * Returns: errno
117 */
118
f25ef0c1 119int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
120{
121 struct buffer_head *bh, *dibh;
48516ced 122 struct gfs2_dinode *di;
cd915493 123 u64 block = 0;
18ec7d5c 124 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
125 int error;
126
127 down_write(&ip->i_rw_mutex);
128
129 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (error)
131 goto out;
907b9bce 132
a2e0f799 133 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
134 /* Get a free block, fill it with the stuffed data,
135 and write it out to disk */
136
b45e41d7 137 unsigned int n = 1;
6e87ed0f 138 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
139 if (error)
140 goto out_brelse;
18ec7d5c 141 if (isdir) {
fbb27873 142 gfs2_trans_remove_revoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 143 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
144 if (error)
145 goto out_brelse;
48516ced 146 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
147 dibh, sizeof(struct gfs2_dinode));
148 brelse(bh);
149 } else {
f25ef0c1 150 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
151 if (error)
152 goto out_brelse;
153 }
154 }
155
156 /* Set up the pointer to the new block */
157
350a9b0a 158 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 159 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
160 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
161
a2e0f799 162 if (i_size_read(&ip->i_inode)) {
48516ced 163 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
164 gfs2_add_inode_blocks(&ip->i_inode, 1);
165 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
166 }
167
ecc30c79 168 ip->i_height = 1;
48516ced 169 di->di_height = cpu_to_be16(1);
b3b94faa 170
a91ea69f 171out_brelse:
b3b94faa 172 brelse(dibh);
a91ea69f 173out:
b3b94faa 174 up_write(&ip->i_rw_mutex);
b3b94faa
DT
175 return error;
176}
177
b3b94faa
DT
178
179/**
180 * find_metapath - Find path through the metadata tree
9b8c81d1 181 * @sdp: The superblock
b3b94faa 182 * @block: The disk block to look up
07e23d68 183 * @mp: The metapath to return the result in
9b8c81d1 184 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
185 *
186 * This routine returns a struct metapath structure that defines a path
187 * through the metadata of inode "ip" to get to block "block".
188 *
189 * Example:
190 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
191 * filesystem with a blocksize of 4096.
192 *
193 * find_metapath() would return a struct metapath structure set to:
07e23d68 194 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
195 *
196 * That means that in order to get to the block containing the byte at
197 * offset 101342453, we would load the indirect block pointed to by pointer
198 * 0 in the dinode. We would then load the indirect block pointed to by
199 * pointer 48 in that indirect block. We would then load the data block
200 * pointed to by pointer 165 in that indirect block.
201 *
202 * ----------------------------------------
203 * | Dinode | |
204 * | | 4|
205 * | |0 1 2 3 4 5 9|
206 * | | 6|
207 * ----------------------------------------
208 * |
209 * |
210 * V
211 * ----------------------------------------
212 * | Indirect Block |
213 * | 5|
214 * | 4 4 4 4 4 5 5 1|
215 * |0 5 6 7 8 9 0 1 2|
216 * ----------------------------------------
217 * |
218 * |
219 * V
220 * ----------------------------------------
221 * | Indirect Block |
222 * | 1 1 1 1 1 5|
223 * | 6 6 6 6 6 1|
224 * |0 3 4 5 6 7 2|
225 * ----------------------------------------
226 * |
227 * |
228 * V
229 * ----------------------------------------
230 * | Data block containing offset |
231 * | 101342453 |
232 * | |
233 * | |
234 * ----------------------------------------
235 *
236 */
237
9b8c81d1
SW
238static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
239 struct metapath *mp, unsigned int height)
b3b94faa 240{
b3b94faa
DT
241 unsigned int i;
242
5f8bd444 243 mp->mp_fheight = height;
9b8c81d1 244 for (i = height; i--;)
7eabb77e 245 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
246}
247
5af4e7a0 248static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 249{
5af4e7a0
BM
250 if (mp->mp_list[0] == 0)
251 return 2;
252 return 1;
9b8c81d1
SW
253}
254
d552a2b9 255/**
20cdc193 256 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
257 * @height: The metadata height (0 = dinode)
258 * @mp: The metapath
259 */
260static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
261{
262 struct buffer_head *bh = mp->mp_bh[height];
263 if (height == 0)
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
265 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
266}
267
b3b94faa
DT
268/**
269 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
270 * @height: The metadata height (0 = dinode)
271 * @mp: The metapath
272 *
273 * Return a pointer to the block number of the next height of the metadata
274 * tree given a buffer containing the pointer to the current height of the
275 * metadata tree.
276 */
277
9b8c81d1 278static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 279{
d552a2b9
BP
280 __be64 *p = metaptr1(height, mp);
281 return p + mp->mp_list[height];
b3b94faa
DT
282}
283
7841b9f0
AG
284static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
285{
286 const struct buffer_head *bh = mp->mp_bh[height];
287 return (const __be64 *)(bh->b_data + bh->b_size);
288}
289
290static void clone_metapath(struct metapath *clone, struct metapath *mp)
291{
292 unsigned int hgt;
293
294 *clone = *mp;
295 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
296 get_bh(clone->mp_bh[hgt]);
297}
298
5cf26b1e 299static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 300{
b99b98dc
SW
301 const __be64 *t;
302
5cf26b1e 303 for (t = start; t < end; t++) {
c3ce5aa9
AG
304 struct buffer_head *rabh;
305
b99b98dc
SW
306 if (!*t)
307 continue;
308
309 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
310 if (trylock_buffer(rabh)) {
311 if (!buffer_uptodate(rabh)) {
312 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
313 submit_bh(REQ_OP_READ,
314 REQ_RAHEAD | REQ_META | REQ_PRIO,
315 rabh);
b99b98dc
SW
316 continue;
317 }
318 unlock_buffer(rabh);
319 }
320 brelse(rabh);
321 }
322}
323
e8b43fe0
AG
324static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
325 unsigned int x, unsigned int h)
d552a2b9 326{
e8b43fe0
AG
327 for (; x < h; x++) {
328 __be64 *ptr = metapointer(x, mp);
329 u64 dblock = be64_to_cpu(*ptr);
330 int ret;
d552a2b9 331
e8b43fe0
AG
332 if (!dblock)
333 break;
334 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
335 if (ret)
336 return ret;
337 }
338 mp->mp_aheight = x + 1;
339 return 0;
d552a2b9
BP
340}
341
b3b94faa 342/**
9b8c81d1
SW
343 * lookup_metapath - Walk the metadata tree to a specific point
344 * @ip: The inode
b3b94faa 345 * @mp: The metapath
b3b94faa 346 *
9b8c81d1
SW
347 * Assumes that the inode's buffer has already been looked up and
348 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
349 * by find_metapath().
350 *
351 * If this function encounters part of the tree which has not been
352 * allocated, it returns the current height of the tree at the point
353 * at which it found the unallocated block. Blocks which are found are
354 * added to the mp->mp_bh[] list.
b3b94faa 355 *
e8b43fe0 356 * Returns: error
b3b94faa
DT
357 */
358
9b8c81d1 359static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 360{
e8b43fe0 361 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
362}
363
d552a2b9
BP
364/**
365 * fillup_metapath - fill up buffers for the metadata path to a specific height
366 * @ip: The inode
367 * @mp: The metapath
368 * @h: The height to which it should be mapped
369 *
370 * Similar to lookup_metapath, but does lookups for a range of heights
371 *
c3ce5aa9 372 * Returns: error or the number of buffers filled
d552a2b9
BP
373 */
374
375static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
376{
e8b43fe0 377 unsigned int x = 0;
c3ce5aa9 378 int ret;
d552a2b9
BP
379
380 if (h) {
381 /* find the first buffer we need to look up. */
e8b43fe0
AG
382 for (x = h - 1; x > 0; x--) {
383 if (mp->mp_bh[x])
384 break;
d552a2b9
BP
385 }
386 }
c3ce5aa9
AG
387 ret = __fillup_metapath(ip, mp, x, h);
388 if (ret)
389 return ret;
390 return mp->mp_aheight - x - 1;
d552a2b9
BP
391}
392
64bc06bb 393static void release_metapath(struct metapath *mp)
dbac6710
SW
394{
395 int i;
396
9b8c81d1
SW
397 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
398 if (mp->mp_bh[i] == NULL)
399 break;
400 brelse(mp->mp_bh[i]);
64bc06bb 401 mp->mp_bh[i] = NULL;
9b8c81d1 402 }
11707ea0
SW
403}
404
30cbf189
SW
405/**
406 * gfs2_extent_length - Returns length of an extent of blocks
bcfe9413
AG
407 * @bh: The metadata block
408 * @ptr: Current position in @bh
409 * @limit: Max extent length to return
30cbf189
SW
410 * @eob: Set to 1 if we hit "end of block"
411 *
30cbf189
SW
412 * Returns: The length of the extent (minimum of one block)
413 */
414
bcfe9413 415static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob)
30cbf189 416{
bcfe9413 417 const __be64 *end = (__be64 *)(bh->b_data + bh->b_size);
30cbf189
SW
418 const __be64 *first = ptr;
419 u64 d = be64_to_cpu(*ptr);
420
421 *eob = 0;
422 do {
423 ptr++;
424 if (ptr >= end)
425 break;
bcfe9413 426 d++;
30cbf189
SW
427 } while(be64_to_cpu(*ptr) == d);
428 if (ptr >= end)
429 *eob = 1;
bcfe9413 430 return ptr - first;
30cbf189
SW
431}
432
7841b9f0
AG
433typedef const __be64 *(*gfs2_metadata_walker)(
434 struct metapath *mp,
435 const __be64 *start, const __be64 *end,
436 u64 factor, void *data);
437
438#define WALK_STOP ((__be64 *)0)
439#define WALK_NEXT ((__be64 *)1)
440
441static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
442 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
443 void *data)
444{
445 struct metapath clone;
446 struct gfs2_inode *ip = GFS2_I(inode);
447 struct gfs2_sbd *sdp = GFS2_SB(inode);
448 const __be64 *start, *end, *ptr;
449 u64 factor = 1;
450 unsigned int hgt;
451 int ret = 0;
452
453 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
454 factor *= sdp->sd_inptrs;
455
456 for (;;) {
457 u64 step;
458
459 /* Walk indirect block. */
460 start = metapointer(hgt, mp);
461 end = metaend(hgt, mp);
462
463 step = (end - start) * factor;
464 if (step > len)
465 end = start + DIV_ROUND_UP_ULL(len, factor);
466
467 ptr = walker(mp, start, end, factor, data);
468 if (ptr == WALK_STOP)
469 break;
470 if (step >= len)
471 break;
472 len -= step;
473 if (ptr != WALK_NEXT) {
474 BUG_ON(!*ptr);
475 mp->mp_list[hgt] += ptr - start;
476 goto fill_up_metapath;
477 }
478
479lower_metapath:
480 /* Decrease height of metapath. */
481 if (mp != &clone) {
482 clone_metapath(&clone, mp);
483 mp = &clone;
484 }
485 brelse(mp->mp_bh[hgt]);
486 mp->mp_bh[hgt] = NULL;
487 if (!hgt)
488 break;
489 hgt--;
490 factor *= sdp->sd_inptrs;
491
492 /* Advance in metadata tree. */
493 (mp->mp_list[hgt])++;
494 start = metapointer(hgt, mp);
495 end = metaend(hgt, mp);
496 if (start >= end) {
497 mp->mp_list[hgt] = 0;
498 if (!hgt)
499 break;
500 goto lower_metapath;
501 }
502
503fill_up_metapath:
504 /* Increase height of metapath. */
505 if (mp != &clone) {
506 clone_metapath(&clone, mp);
507 mp = &clone;
508 }
509 ret = fillup_metapath(ip, mp, ip->i_height - 1);
510 if (ret < 0)
511 break;
512 hgt += ret;
513 for (; ret; ret--)
514 do_div(factor, sdp->sd_inptrs);
515 mp->mp_aheight = hgt + 1;
516 }
517 if (mp == &clone)
518 release_metapath(mp);
519 return ret;
520}
521
522struct gfs2_hole_walker_args {
523 u64 blocks;
524};
525
526static const __be64 *gfs2_hole_walker(struct metapath *mp,
527 const __be64 *start, const __be64 *end,
528 u64 factor, void *data)
529{
530 struct gfs2_hole_walker_args *args = data;
531 const __be64 *ptr;
532
533 for (ptr = start; ptr < end; ptr++) {
534 if (*ptr) {
535 args->blocks += (ptr - start) * factor;
536 if (mp->mp_aheight == mp->mp_fheight)
537 return WALK_STOP;
538 return ptr; /* increase height */
539 }
540 }
541 args->blocks += (end - start) * factor;
542 return WALK_NEXT;
543}
544
545/**
546 * gfs2_hole_size - figure out the size of a hole
547 * @inode: The inode
548 * @lblock: The logical starting block number
549 * @len: How far to look (in blocks)
550 * @mp: The metapath at lblock
551 * @iomap: The iomap to store the hole size in
552 *
553 * This function modifies @mp.
554 *
555 * Returns: errno on error
556 */
557static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
558 struct metapath *mp, struct iomap *iomap)
559{
560 struct gfs2_hole_walker_args args = { };
561 int ret = 0;
562
563 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
564 if (!ret)
565 iomap->length = args.blocks << inode->i_blkbits;
566 return ret;
567}
568
9b8c81d1
SW
569static inline __be64 *gfs2_indirect_init(struct metapath *mp,
570 struct gfs2_glock *gl, unsigned int i,
571 unsigned offset, u64 bn)
572{
573 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
574 ((i > 1) ? sizeof(struct gfs2_meta_header) :
575 sizeof(struct gfs2_dinode)));
576 BUG_ON(i < 1);
577 BUG_ON(mp->mp_bh[i] != NULL);
578 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 579 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
580 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
581 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
582 ptr += offset;
583 *ptr = cpu_to_be64(bn);
584 return ptr;
585}
586
587enum alloc_state {
588 ALLOC_DATA = 0,
589 ALLOC_GROW_DEPTH = 1,
590 ALLOC_GROW_HEIGHT = 2,
591 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
592};
593
594/**
628e366d 595 * gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 596 * @inode: The GFS2 inode
628e366d
AG
597 * @iomap: The iomap structure
598 * @flags: iomap flags
5f8bd444 599 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
600 *
601 * In this routine we may have to alloc:
602 * i) Indirect blocks to grow the metadata tree height
603 * ii) Indirect blocks to fill in lower part of the metadata tree
604 * iii) Data blocks
605 *
64bc06bb
AG
606 * This function is called after gfs2_iomap_get, which works out the
607 * total number of blocks which we need via gfs2_alloc_size.
608 *
609 * We then do the actual allocation asking for an extent at a time (if
610 * enough contiguous free blocks are available, there will only be one
611 * allocation request per call) and uses the state machine to initialise
612 * the blocks in order.
9b8c81d1 613 *
628e366d
AG
614 * Right now, this function will allocate at most one indirect block
615 * worth of data -- with a default block size of 4K, that's slightly
616 * less than 2M. If this limitation is ever removed to allow huge
617 * allocations, we would probably still want to limit the iomap size we
618 * return to avoid stalling other tasks during huge writes; the next
619 * iomap iteration would then find the blocks already allocated.
620 *
9b8c81d1
SW
621 * Returns: errno on error
622 */
623
3974320c
BP
624static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
625 unsigned flags, struct metapath *mp)
9b8c81d1
SW
626{
627 struct gfs2_inode *ip = GFS2_I(inode);
628 struct gfs2_sbd *sdp = GFS2_SB(inode);
629 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 630 u64 bn;
5af4e7a0 631 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
64bc06bb 632 size_t dblks = iomap->length >> inode->i_blkbits;
5f8bd444 633 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 634 int ret;
9b8c81d1
SW
635 enum alloc_state state;
636 __be64 *ptr;
637 __be64 zero_bn = 0;
638
5f8bd444 639 BUG_ON(mp->mp_aheight < 1);
9b8c81d1 640 BUG_ON(dibh == NULL);
64bc06bb 641 BUG_ON(dblks < 1);
9b8c81d1 642
350a9b0a 643 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 644
628e366d
AG
645 down_write(&ip->i_rw_mutex);
646
5f8bd444 647 if (mp->mp_fheight == mp->mp_aheight) {
64bc06bb 648 /* Bottom indirect block exists */
9b8c81d1
SW
649 state = ALLOC_DATA;
650 } else {
651 /* Need to allocate indirect blocks */
5f8bd444 652 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 653 /* Writing into existing tree, extend tree down */
5f8bd444 654 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
655 state = ALLOC_GROW_DEPTH;
656 } else {
657 /* Building up tree height */
658 state = ALLOC_GROW_HEIGHT;
5f8bd444 659 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 660 branch_start = metapath_branch_start(mp);
5f8bd444 661 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
662 }
663 }
664
665 /* start of the second part of the function (state machine) */
666
3974320c 667 blks = dblks + iblks;
5f8bd444 668 i = mp->mp_aheight;
9b8c81d1
SW
669 do {
670 n = blks - alloced;
628e366d
AG
671 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
672 if (ret)
673 goto out;
9b8c81d1
SW
674 alloced += n;
675 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
fbb27873 676 gfs2_trans_remove_revoke(sdp, bn, n);
9b8c81d1
SW
677 switch (state) {
678 /* Growing height of tree */
679 case ALLOC_GROW_HEIGHT:
680 if (i == 1) {
681 ptr = (__be64 *)(dibh->b_data +
682 sizeof(struct gfs2_dinode));
683 zero_bn = *ptr;
684 }
5f8bd444
BP
685 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
686 i++, n--)
9b8c81d1 687 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 688 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
689 i--;
690 gfs2_buffer_copy_tail(mp->mp_bh[i],
691 sizeof(struct gfs2_meta_header),
692 dibh, sizeof(struct gfs2_dinode));
693 gfs2_buffer_clear_tail(dibh,
694 sizeof(struct gfs2_dinode) +
695 sizeof(__be64));
696 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
697 sizeof(struct gfs2_meta_header));
698 *ptr = zero_bn;
699 state = ALLOC_GROW_DEPTH;
5f8bd444 700 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
701 if (mp->mp_bh[i] == NULL)
702 break;
703 brelse(mp->mp_bh[i]);
704 mp->mp_bh[i] = NULL;
705 }
5af4e7a0 706 i = branch_start;
9b8c81d1
SW
707 }
708 if (n == 0)
709 break;
0a4c9265 710 /* fall through - To branching from existing tree */
9b8c81d1 711 case ALLOC_GROW_DEPTH:
5f8bd444 712 if (i > 1 && i < mp->mp_fheight)
350a9b0a 713 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 714 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
715 gfs2_indirect_init(mp, ip->i_gl, i,
716 mp->mp_list[i-1], bn++);
5f8bd444 717 if (i == mp->mp_fheight)
9b8c81d1
SW
718 state = ALLOC_DATA;
719 if (n == 0)
720 break;
0a4c9265 721 /* fall through - To tree complete, adding data blocks */
9b8c81d1 722 case ALLOC_DATA:
3974320c 723 BUG_ON(n > dblks);
9b8c81d1 724 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 725 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 726 dblks = n;
9b8c81d1 727 ptr = metapointer(end_of_metadata, mp);
3974320c 728 iomap->addr = bn << inode->i_blkbits;
628e366d 729 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
730 while (n-- > 0)
731 *ptr++ = cpu_to_be64(bn++);
732 break;
733 }
3974320c 734 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 735
d505a96a 736 iomap->type = IOMAP_MAPPED;
3974320c 737 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 738 ip->i_height = mp->mp_fheight;
9b8c81d1 739 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
740 gfs2_dinode_out(ip, dibh->b_data);
741out:
742 up_write(&ip->i_rw_mutex);
743 return ret;
9b8c81d1
SW
744}
745
7ee66c03
CH
746#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
747
64bc06bb
AG
748/**
749 * gfs2_alloc_size - Compute the maximum allocation size
750 * @inode: The inode
751 * @mp: The metapath
752 * @size: Requested size in blocks
753 *
754 * Compute the maximum size of the next allocation at @mp.
755 *
756 * Returns: size in blocks
757 */
758static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size)
3974320c
BP
759{
760 struct gfs2_inode *ip = GFS2_I(inode);
64bc06bb
AG
761 struct gfs2_sbd *sdp = GFS2_SB(inode);
762 const __be64 *first, *ptr, *end;
763
764 /*
765 * For writes to stuffed files, this function is called twice via
766 * gfs2_iomap_get, before and after unstuffing. The size we return the
767 * first time needs to be large enough to get the reservation and
768 * allocation sizes right. The size we return the second time must
769 * be exact or else gfs2_iomap_alloc won't do the right thing.
770 */
771
772 if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) {
773 unsigned int maxsize = mp->mp_fheight > 1 ?
774 sdp->sd_inptrs : sdp->sd_diptrs;
775 maxsize -= mp->mp_list[mp->mp_fheight - 1];
776 if (size > maxsize)
777 size = maxsize;
778 return size;
779 }
3974320c 780
64bc06bb
AG
781 first = metapointer(ip->i_height - 1, mp);
782 end = metaend(ip->i_height - 1, mp);
783 if (end - first > size)
784 end = first + size;
785 for (ptr = first; ptr < end; ptr++) {
786 if (*ptr)
787 break;
788 }
789 return ptr - first;
3974320c
BP
790}
791
792/**
628e366d 793 * gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
794 * @inode: The inode
795 * @pos: Starting position in bytes
796 * @length: Length to map, in bytes
797 * @flags: iomap flags
798 * @iomap: The iomap structure
628e366d 799 * @mp: The metapath
3974320c
BP
800 *
801 * Returns: errno
802 */
628e366d
AG
803static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
804 unsigned flags, struct iomap *iomap,
805 struct metapath *mp)
b3b94faa 806{
feaa7bba
SW
807 struct gfs2_inode *ip = GFS2_I(inode);
808 struct gfs2_sbd *sdp = GFS2_SB(inode);
d505a96a 809 loff_t size = i_size_read(inode);
9b8c81d1 810 __be64 *ptr;
3974320c 811 sector_t lblock;
628e366d
AG
812 sector_t lblock_stop;
813 int ret;
9b8c81d1 814 int eob;
628e366d 815 u64 len;
d505a96a 816 struct buffer_head *dibh = NULL, *bh;
9b8c81d1 817 u8 height;
7276b3b0 818
628e366d
AG
819 if (!length)
820 return -EINVAL;
b3b94faa 821
d505a96a
AG
822 down_read(&ip->i_rw_mutex);
823
824 ret = gfs2_meta_inode_buffer(ip, &dibh);
825 if (ret)
826 goto unlock;
c26b5aa8 827 mp->mp_bh[0] = dibh;
d505a96a 828
49edd5bf 829 if (gfs2_is_stuffed(ip)) {
d505a96a
AG
830 if (flags & IOMAP_WRITE) {
831 loff_t max_size = gfs2_max_stuffed_size(ip);
832
833 if (pos + length > max_size)
834 goto unstuff;
835 iomap->length = max_size;
836 } else {
837 if (pos >= size) {
838 if (flags & IOMAP_REPORT) {
839 ret = -ENOENT;
840 goto unlock;
841 } else {
842 /* report a hole */
843 iomap->offset = pos;
844 iomap->length = length;
845 goto do_alloc;
846 }
847 }
848 iomap->length = size;
49edd5bf 849 }
d505a96a
AG
850 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
851 sizeof(struct gfs2_dinode);
852 iomap->type = IOMAP_INLINE;
64bc06bb 853 iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode);
d505a96a 854 goto out;
3974320c 855 }
d505a96a
AG
856
857unstuff:
3974320c 858 lblock = pos >> inode->i_blkbits;
3974320c 859 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
860 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
861 len = lblock_stop - lblock + 1;
d505a96a 862 iomap->length = len << inode->i_blkbits;
628e366d 863
9b8c81d1 864 height = ip->i_height;
9a38662b 865 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 866 height++;
628e366d 867 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
868 if (height > ip->i_height || gfs2_is_stuffed(ip))
869 goto do_alloc;
3974320c 870
628e366d 871 ret = lookup_metapath(ip, mp);
e8b43fe0 872 if (ret)
628e366d 873 goto unlock;
3974320c 874
628e366d 875 if (mp->mp_aheight != ip->i_height)
9b8c81d1 876 goto do_alloc;
3974320c 877
628e366d 878 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
879 if (*ptr == 0)
880 goto do_alloc;
3974320c 881
628e366d 882 bh = mp->mp_bh[ip->i_height - 1];
bcfe9413 883 len = gfs2_extent_length(bh, ptr, len, &eob);
3974320c 884
628e366d
AG
885 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
886 iomap->length = len << inode->i_blkbits;
887 iomap->type = IOMAP_MAPPED;
0ed91eca 888 iomap->flags |= IOMAP_F_MERGED;
9b8c81d1 889 if (eob)
7ee66c03 890 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 891
3974320c 892out:
628e366d
AG
893 iomap->bdev = inode->i_sb->s_bdev;
894unlock:
895 up_read(&ip->i_rw_mutex);
9b8c81d1 896 return ret;
30cbf189 897
9b8c81d1 898do_alloc:
628e366d 899 iomap->addr = IOMAP_NULL_ADDR;
628e366d 900 iomap->type = IOMAP_HOLE;
628e366d 901 if (flags & IOMAP_REPORT) {
49edd5bf 902 if (pos >= size)
3974320c 903 ret = -ENOENT;
628e366d
AG
904 else if (height == ip->i_height)
905 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf
AG
906 else
907 iomap->length = size - pos;
64bc06bb
AG
908 } else if (flags & IOMAP_WRITE) {
909 u64 alloc_size;
910
967bcc91
AG
911 if (flags & IOMAP_DIRECT)
912 goto out; /* (see gfs2_file_direct_write) */
913
64bc06bb
AG
914 len = gfs2_alloc_size(inode, mp, len);
915 alloc_size = len << inode->i_blkbits;
916 if (alloc_size < iomap->length)
917 iomap->length = alloc_size;
918 } else {
d505a96a
AG
919 if (pos < size && height == ip->i_height)
920 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
b3b94faa 921 }
628e366d 922 goto out;
3974320c
BP
923}
924
7c70b896
BP
925/**
926 * gfs2_lblk_to_dblk - convert logical block to disk block
927 * @inode: the inode of the file we're mapping
928 * @lblock: the block relative to the start of the file
929 * @dblock: the returned dblock, if no error
930 *
931 * This function maps a single block from a file logical block (relative to
932 * the start of the file) to a file system absolute block using iomap.
933 *
934 * Returns: the absolute file system block, or an error
935 */
936int gfs2_lblk_to_dblk(struct inode *inode, u32 lblock, u64 *dblock)
937{
938 struct iomap iomap = { };
939 struct metapath mp = { .mp_aheight = 1, };
940 loff_t pos = (loff_t)lblock << inode->i_blkbits;
941 int ret;
942
943 ret = gfs2_iomap_get(inode, pos, i_blocksize(inode), 0, &iomap, &mp);
944 release_metapath(&mp);
945 if (ret == 0)
946 *dblock = iomap.addr >> inode->i_blkbits;
947
948 return ret;
949}
950
64bc06bb
AG
951static int gfs2_write_lock(struct inode *inode)
952{
953 struct gfs2_inode *ip = GFS2_I(inode);
954 struct gfs2_sbd *sdp = GFS2_SB(inode);
955 int error;
956
957 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
958 error = gfs2_glock_nq(&ip->i_gh);
959 if (error)
960 goto out_uninit;
961 if (&ip->i_inode == sdp->sd_rindex) {
962 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
963
964 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
965 GL_NOCACHE, &m_ip->i_gh);
966 if (error)
967 goto out_unlock;
968 }
969 return 0;
970
971out_unlock:
972 gfs2_glock_dq(&ip->i_gh);
973out_uninit:
974 gfs2_holder_uninit(&ip->i_gh);
975 return error;
976}
977
978static void gfs2_write_unlock(struct inode *inode)
979{
980 struct gfs2_inode *ip = GFS2_I(inode);
981 struct gfs2_sbd *sdp = GFS2_SB(inode);
982
983 if (&ip->i_inode == sdp->sd_rindex) {
984 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
985
986 gfs2_glock_dq_uninit(&m_ip->i_gh);
987 }
988 gfs2_glock_dq_uninit(&ip->i_gh);
989}
990
d0a22a4b
AG
991static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
992 unsigned len, struct iomap *iomap)
993{
2741b672 994 unsigned int blockmask = i_blocksize(inode) - 1;
d0a22a4b 995 struct gfs2_sbd *sdp = GFS2_SB(inode);
2741b672 996 unsigned int blocks;
d0a22a4b 997
2741b672
AG
998 blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
999 return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
d0a22a4b
AG
1000}
1001
df0db3ec
AG
1002static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1003 unsigned copied, struct page *page,
1004 struct iomap *iomap)
64bc06bb
AG
1005{
1006 struct gfs2_inode *ip = GFS2_I(inode);
d0a22a4b 1007 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1008
d0a22a4b 1009 if (page && !gfs2_is_stuffed(ip))
df0db3ec 1010 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
d0a22a4b 1011 gfs2_trans_end(sdp);
64bc06bb
AG
1012}
1013
df0db3ec 1014static const struct iomap_page_ops gfs2_iomap_page_ops = {
d0a22a4b 1015 .page_prepare = gfs2_iomap_page_prepare,
df0db3ec
AG
1016 .page_done = gfs2_iomap_page_done,
1017};
1018
64bc06bb
AG
1019static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1020 loff_t length, unsigned flags,
c26b5aa8
AG
1021 struct iomap *iomap,
1022 struct metapath *mp)
64bc06bb 1023{
64bc06bb
AG
1024 struct gfs2_inode *ip = GFS2_I(inode);
1025 struct gfs2_sbd *sdp = GFS2_SB(inode);
1026 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1027 bool unstuff, alloc_required;
1028 int ret;
1029
1030 ret = gfs2_write_lock(inode);
1031 if (ret)
1032 return ret;
1033
1034 unstuff = gfs2_is_stuffed(ip) &&
1035 pos + length > gfs2_max_stuffed_size(ip);
1036
c26b5aa8 1037 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
64bc06bb 1038 if (ret)
c26b5aa8 1039 goto out_unlock;
64bc06bb
AG
1040
1041 alloc_required = unstuff || iomap->type == IOMAP_HOLE;
1042
1043 if (alloc_required || gfs2_is_jdata(ip))
1044 gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
1045 &ind_blocks);
1046
1047 if (alloc_required) {
1048 struct gfs2_alloc_parms ap = {
1049 .target = data_blocks + ind_blocks
1050 };
1051
1052 ret = gfs2_quota_lock_check(ip, &ap);
1053 if (ret)
c26b5aa8 1054 goto out_unlock;
64bc06bb
AG
1055
1056 ret = gfs2_inplace_reserve(ip, &ap);
1057 if (ret)
1058 goto out_qunlock;
1059 }
1060
1061 rblocks = RES_DINODE + ind_blocks;
1062 if (gfs2_is_jdata(ip))
1063 rblocks += data_blocks;
1064 if (ind_blocks || data_blocks)
1065 rblocks += RES_STATFS + RES_QUOTA;
1066 if (inode == sdp->sd_rindex)
1067 rblocks += 2 * RES_STATFS;
1068 if (alloc_required)
1069 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1070
d0a22a4b
AG
1071 if (unstuff || iomap->type == IOMAP_HOLE) {
1072 struct gfs2_trans *tr;
64bc06bb 1073
d0a22a4b
AG
1074 ret = gfs2_trans_begin(sdp, rblocks,
1075 iomap->length >> inode->i_blkbits);
64bc06bb 1076 if (ret)
d0a22a4b
AG
1077 goto out_trans_fail;
1078
1079 if (unstuff) {
1080 ret = gfs2_unstuff_dinode(ip, NULL);
1081 if (ret)
1082 goto out_trans_end;
1083 release_metapath(mp);
1084 ret = gfs2_iomap_get(inode, iomap->offset,
1085 iomap->length, flags, iomap, mp);
1086 if (ret)
1087 goto out_trans_end;
1088 }
64bc06bb 1089
d0a22a4b
AG
1090 if (iomap->type == IOMAP_HOLE) {
1091 ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
1092 if (ret) {
1093 gfs2_trans_end(sdp);
1094 gfs2_inplace_release(ip);
1095 punch_hole(ip, iomap->offset, iomap->length);
1096 goto out_qunlock;
1097 }
64bc06bb 1098 }
d0a22a4b
AG
1099
1100 tr = current->journal_info;
1101 if (tr->tr_num_buf_new)
1102 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1103 else
1104 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[0]);
1105
1106 gfs2_trans_end(sdp);
64bc06bb 1107 }
d0a22a4b
AG
1108
1109 if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
df0db3ec 1110 iomap->page_ops = &gfs2_iomap_page_ops;
64bc06bb
AG
1111 return 0;
1112
1113out_trans_end:
1114 gfs2_trans_end(sdp);
1115out_trans_fail:
1116 if (alloc_required)
1117 gfs2_inplace_release(ip);
1118out_qunlock:
1119 if (alloc_required)
1120 gfs2_quota_unlock(ip);
c26b5aa8 1121out_unlock:
64bc06bb
AG
1122 gfs2_write_unlock(inode);
1123 return ret;
1124}
1125
628e366d
AG
1126static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1127 unsigned flags, struct iomap *iomap)
1128{
1129 struct gfs2_inode *ip = GFS2_I(inode);
1130 struct metapath mp = { .mp_aheight = 1, };
1131 int ret;
1132
0ed91eca
AG
1133 iomap->flags |= IOMAP_F_BUFFER_HEAD;
1134
628e366d 1135 trace_gfs2_iomap_start(ip, pos, length, flags);
967bcc91 1136 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
c26b5aa8 1137 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
628e366d
AG
1138 } else {
1139 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
c26b5aa8 1140
967bcc91
AG
1141 /*
1142 * Silently fall back to buffered I/O for stuffed files or if
1143 * we've hot a hole (see gfs2_file_direct_write).
1144 */
1145 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1146 iomap->type != IOMAP_MAPPED)
1147 ret = -ENOTBLK;
628e366d 1148 }
c26b5aa8 1149 release_metapath(&mp);
628e366d
AG
1150 trace_gfs2_iomap_end(ip, iomap, ret);
1151 return ret;
1152}
1153
64bc06bb
AG
1154static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1155 ssize_t written, unsigned flags, struct iomap *iomap)
1156{
1157 struct gfs2_inode *ip = GFS2_I(inode);
1158 struct gfs2_sbd *sdp = GFS2_SB(inode);
64bc06bb 1159
967bcc91 1160 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
64bc06bb
AG
1161 goto out;
1162
d0a22a4b 1163 if (!gfs2_is_stuffed(ip))
64bc06bb
AG
1164 gfs2_ordered_add_inode(ip);
1165
d0a22a4b 1166 if (inode == sdp->sd_rindex)
64bc06bb 1167 adjust_fs_space(inode);
64bc06bb 1168
64bc06bb
AG
1169 gfs2_inplace_release(ip);
1170
1171 if (length != written && (iomap->flags & IOMAP_F_NEW)) {
1172 /* Deallocate blocks that were just allocated. */
1173 loff_t blockmask = i_blocksize(inode) - 1;
1174 loff_t end = (pos + length) & ~blockmask;
1175
1176 pos = (pos + written + blockmask) & ~blockmask;
1177 if (pos < end) {
1178 truncate_pagecache_range(inode, pos, end - 1);
1179 punch_hole(ip, pos, end - pos);
1180 }
1181 }
1182
1183 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1184 gfs2_quota_unlock(ip);
1185 gfs2_write_unlock(inode);
1186
1187out:
64bc06bb
AG
1188 return 0;
1189}
1190
628e366d
AG
1191const struct iomap_ops gfs2_iomap_ops = {
1192 .iomap_begin = gfs2_iomap_begin,
64bc06bb 1193 .iomap_end = gfs2_iomap_end,
628e366d
AG
1194};
1195
3974320c 1196/**
d39d18e0 1197 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
1198 * @inode: The inode
1199 * @lblock: The logical block number
1200 * @bh_map: The bh to be mapped
1201 * @create: True if its ok to alloc blocks to satify the request
1202 *
d39d18e0
AG
1203 * The size of the requested mapping is defined in bh_map->b_size.
1204 *
1205 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
1206 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
1207 * bh_map->b_size to indicate the size of the mapping when @lblock and
1208 * successive blocks are mapped, up to the requested size.
1209 *
1210 * Sets buffer_boundary() if a read of metadata will be required
1211 * before the next block can be mapped. Sets buffer_new() if new
1212 * blocks were allocated.
3974320c
BP
1213 *
1214 * Returns: errno
1215 */
1216
1217int gfs2_block_map(struct inode *inode, sector_t lblock,
1218 struct buffer_head *bh_map, int create)
1219{
1220 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
1221 loff_t pos = (loff_t)lblock << inode->i_blkbits;
1222 loff_t length = bh_map->b_size;
1223 struct metapath mp = { .mp_aheight = 1, };
1224 struct iomap iomap = { };
1225 int ret;
3974320c
BP
1226
1227 clear_buffer_mapped(bh_map);
1228 clear_buffer_new(bh_map);
1229 clear_buffer_boundary(bh_map);
1230 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
1231
628e366d
AG
1232 if (create) {
1233 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
1234 if (!ret && iomap.type == IOMAP_HOLE)
1235 ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
1236 release_metapath(&mp);
1237 } else {
1238 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
1239 release_metapath(&mp);
3974320c 1240 }
628e366d
AG
1241 if (ret)
1242 goto out;
3974320c
BP
1243
1244 if (iomap.length > bh_map->b_size) {
1245 iomap.length = bh_map->b_size;
7ee66c03 1246 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 1247 }
3974320c
BP
1248 if (iomap.addr != IOMAP_NULL_ADDR)
1249 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
1250 bh_map->b_size = iomap.length;
7ee66c03 1251 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
1252 set_buffer_boundary(bh_map);
1253 if (iomap.flags & IOMAP_F_NEW)
1254 set_buffer_new(bh_map);
1255
1256out:
1257 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
1258 return ret;
fd88de56
SW
1259}
1260
941e6d7d
SW
1261/*
1262 * Deprecated: do not use in new code
1263 */
fd88de56
SW
1264int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
1265{
23591256 1266 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 1267 int ret;
fd88de56
SW
1268 int create = *new;
1269
1270 BUG_ON(!extlen);
1271 BUG_ON(!dblock);
1272 BUG_ON(!new);
1273
47a9a527 1274 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 1275 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
1276 *extlen = bh.b_size >> inode->i_blkbits;
1277 *dblock = bh.b_blocknr;
1278 if (buffer_new(&bh))
1279 *new = 1;
1280 else
1281 *new = 0;
1282 return ret;
b3b94faa
DT
1283}
1284
ba7f7290 1285/**
bdba0d5e 1286 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
1287 *
1288 * This is partly borrowed from ext3.
1289 */
bdba0d5e
AG
1290static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1291 unsigned int length)
ba7f7290 1292{
bdba0d5e 1293 struct address_space *mapping = inode->i_mapping;
ba7f7290 1294 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
1295 unsigned long index = from >> PAGE_SHIFT;
1296 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 1297 unsigned blocksize, iblock, pos;
ba7f7290
SW
1298 struct buffer_head *bh;
1299 struct page *page;
ba7f7290
SW
1300 int err;
1301
220cca2a 1302 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
1303 if (!page)
1304 return 0;
1305
1306 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 1307 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
1308
1309 if (!page_has_buffers(page))
1310 create_empty_buffers(page, blocksize, 0);
1311
1312 /* Find the buffer that contains "offset" */
1313 bh = page_buffers(page);
1314 pos = blocksize;
1315 while (offset >= pos) {
1316 bh = bh->b_this_page;
1317 iblock++;
1318 pos += blocksize;
1319 }
1320
1321 err = 0;
1322
1323 if (!buffer_mapped(bh)) {
e9e1ef2b 1324 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
1325 /* unmapped? It's a hole - nothing to do */
1326 if (!buffer_mapped(bh))
1327 goto unlock;
1328 }
1329
1330 /* Ok, it's mapped. Make sure it's up-to-date */
1331 if (PageUptodate(page))
1332 set_buffer_uptodate(bh);
1333
1334 if (!buffer_uptodate(bh)) {
1335 err = -EIO;
dfec8a14 1336 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
1337 wait_on_buffer(bh);
1338 /* Uhhuh. Read error. Complain and punt. */
1339 if (!buffer_uptodate(bh))
1340 goto unlock;
1875f2f3 1341 err = 0;
ba7f7290
SW
1342 }
1343
845802b1 1344 if (gfs2_is_jdata(ip))
350a9b0a 1345 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
1346 else
1347 gfs2_ordered_add_inode(ip);
ba7f7290 1348
eebd2aa3 1349 zero_user(page, offset, length);
40bc9a27 1350 mark_buffer_dirty(bh);
ba7f7290
SW
1351unlock:
1352 unlock_page(page);
09cbfeaf 1353 put_page(page);
ba7f7290
SW
1354 return err;
1355}
1356
c62baf65
FF
1357#define GFS2_JTRUNC_REVOKES 8192
1358
fa731fc4
SW
1359/**
1360 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1361 * @inode: The inode being truncated
1362 * @oldsize: The original (larger) size
1363 * @newsize: The new smaller size
1364 *
1365 * With jdata files, we have to journal a revoke for each block which is
1366 * truncated. As a result, we need to split this into separate transactions
1367 * if the number of pages being truncated gets too large.
1368 */
1369
fa731fc4
SW
1370static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1371{
1372 struct gfs2_sbd *sdp = GFS2_SB(inode);
1373 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1374 u64 chunk;
1375 int error;
1376
1377 while (oldsize != newsize) {
e7fdf004
AG
1378 struct gfs2_trans *tr;
1379 unsigned int offs;
1380
fa731fc4
SW
1381 chunk = oldsize - newsize;
1382 if (chunk > max_chunk)
1383 chunk = max_chunk;
e7fdf004
AG
1384
1385 offs = oldsize & ~PAGE_MASK;
1386 if (offs && chunk > PAGE_SIZE)
1387 chunk = offs + ((chunk - offs) & PAGE_MASK);
1388
7caef267 1389 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1390 oldsize -= chunk;
e7fdf004
AG
1391
1392 tr = current->journal_info;
1393 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1394 continue;
1395
fa731fc4
SW
1396 gfs2_trans_end(sdp);
1397 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1398 if (error)
1399 return error;
1400 }
1401
1402 return 0;
1403}
1404
8b5860a3 1405static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1406{
ff8f33c8
SW
1407 struct gfs2_inode *ip = GFS2_I(inode);
1408 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1409 struct buffer_head *dibh = NULL;
b3b94faa 1410 int journaled = gfs2_is_jdata(ip);
8b5860a3 1411 u64 oldsize = inode->i_size;
b3b94faa
DT
1412 int error;
1413
fa731fc4
SW
1414 if (journaled)
1415 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1416 else
1417 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1418 if (error)
1419 return error;
1420
1421 error = gfs2_meta_inode_buffer(ip, &dibh);
1422 if (error)
1423 goto out;
1424
350a9b0a 1425 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1426
b3b94faa 1427 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1428 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1429 } else {
bdba0d5e
AG
1430 unsigned int blocksize = i_blocksize(inode);
1431 unsigned int offs = newsize & (blocksize - 1);
1432 if (offs) {
1433 error = gfs2_block_zero_range(inode, newsize,
1434 blocksize - offs);
ff8f33c8 1435 if (error)
80990f40 1436 goto out;
b3b94faa 1437 }
ff8f33c8 1438 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1439 }
1440
ff8f33c8 1441 i_size_write(inode, newsize);
078cd827 1442 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1443 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1444
fa731fc4
SW
1445 if (journaled)
1446 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1447 else
7caef267 1448 truncate_pagecache(inode, newsize);
fa731fc4 1449
a91ea69f 1450out:
80990f40
AG
1451 brelse(dibh);
1452 if (current->journal_info)
1453 gfs2_trans_end(sdp);
b3b94faa
DT
1454 return error;
1455}
1456
628e366d
AG
1457int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1458 struct iomap *iomap)
1459{
1460 struct metapath mp = { .mp_aheight = 1, };
1461 int ret;
1462
1463 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1464 if (!ret && iomap->type == IOMAP_HOLE)
1465 ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1466 release_metapath(&mp);
1467 return ret;
1468}
1469
d552a2b9
BP
1470/**
1471 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1472 * @ip: inode
1473 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1474 * @bh: buffer head to sweep
1475 * @start: starting point in bh
1476 * @end: end point in bh
1477 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1478 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1479 *
1480 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1481 * free, and free them all. However, we do it one rgrp at a time. If this
1482 * block has references to multiple rgrps, we break it into individual
1483 * transactions. This allows other processes to use the rgrps while we're
1484 * focused on a single one, for better concurrency / performance.
1485 * At every transaction boundary, we rewrite the inode into the journal.
1486 * That way the bitmaps are kept consistent with the inode and we can recover
1487 * if we're interrupted by power-outages.
1488 *
1489 * Returns: 0, or return code if an error occurred.
1490 * *btotal has the total number of blocks freed
1491 */
1492static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1493 struct buffer_head *bh, __be64 *start, __be64 *end,
1494 bool meta, u32 *btotal)
b3b94faa 1495{
9b8c81d1 1496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1497 struct gfs2_rgrpd *rgd;
1498 struct gfs2_trans *tr;
5cf26b1e 1499 __be64 *p;
d552a2b9
BP
1500 int blks_outside_rgrp;
1501 u64 bn, bstart, isize_blks;
1502 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1503 int ret = 0;
1504 bool buf_in_tr = false; /* buffer was added to transaction */
1505
d552a2b9 1506more_rgrps:
5cf26b1e
AG
1507 rgd = NULL;
1508 if (gfs2_holder_initialized(rd_gh)) {
1509 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1510 gfs2_assert_withdraw(sdp,
1511 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1512 }
d552a2b9
BP
1513 blks_outside_rgrp = 0;
1514 bstart = 0;
1515 blen = 0;
d552a2b9 1516
5cf26b1e 1517 for (p = start; p < end; p++) {
d552a2b9
BP
1518 if (!*p)
1519 continue;
1520 bn = be64_to_cpu(*p);
5cf26b1e
AG
1521
1522 if (rgd) {
1523 if (!rgrp_contains_block(rgd, bn)) {
1524 blks_outside_rgrp++;
1525 continue;
1526 }
d552a2b9 1527 } else {
90bcab99 1528 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1529 if (unlikely(!rgd)) {
1530 ret = -EIO;
1531 goto out;
1532 }
d552a2b9
BP
1533 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1534 0, rd_gh);
1535 if (ret)
1536 goto out;
1537
1538 /* Must be done with the rgrp glock held: */
1539 if (gfs2_rs_active(&ip->i_res) &&
1540 rgd == ip->i_res.rs_rbm.rgd)
1541 gfs2_rs_deltree(&ip->i_res);
1542 }
1543
d552a2b9
BP
1544 /* The size of our transactions will be unknown until we
1545 actually process all the metadata blocks that relate to
1546 the rgrp. So we estimate. We know it can't be more than
1547 the dinode's i_blocks and we don't want to exceed the
1548 journal flush threshold, sd_log_thresh2. */
1549 if (current->journal_info == NULL) {
1550 unsigned int jblocks_rqsted, revokes;
1551
1552 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1553 RES_INDIRECT;
1554 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1555 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1556 jblocks_rqsted +=
1557 atomic_read(&sdp->sd_log_thresh2);
1558 else
1559 jblocks_rqsted += isize_blks;
1560 revokes = jblocks_rqsted;
1561 if (meta)
5cf26b1e 1562 revokes += end - start;
d552a2b9
BP
1563 else if (ip->i_depth)
1564 revokes += sdp->sd_inptrs;
1565 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1566 if (ret)
1567 goto out_unlock;
1568 down_write(&ip->i_rw_mutex);
1569 }
1570 /* check if we will exceed the transaction blocks requested */
1571 tr = current->journal_info;
1572 if (tr->tr_num_buf_new + RES_STATFS +
1573 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1574 /* We set blks_outside_rgrp to ensure the loop will
1575 be repeated for the same rgrp, but with a new
1576 transaction. */
1577 blks_outside_rgrp++;
1578 /* This next part is tricky. If the buffer was added
1579 to the transaction, we've already set some block
1580 pointers to 0, so we better follow through and free
1581 them, or we will introduce corruption (so break).
1582 This may be impossible, or at least rare, but I
1583 decided to cover the case regardless.
1584
1585 If the buffer was not added to the transaction
1586 (this call), doing so would exceed our transaction
1587 size, so we need to end the transaction and start a
1588 new one (so goto). */
1589
1590 if (buf_in_tr)
1591 break;
1592 goto out_unlock;
1593 }
1594
1595 gfs2_trans_add_meta(ip->i_gl, bh);
1596 buf_in_tr = true;
1597 *p = 0;
1598 if (bstart + blen == bn) {
1599 blen++;
1600 continue;
1601 }
1602 if (bstart) {
0ddeded4 1603 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1604 (*btotal) += blen;
1605 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1606 }
1607 bstart = bn;
1608 blen = 1;
1609 }
1610 if (bstart) {
0ddeded4 1611 __gfs2_free_blocks(ip, rgd, bstart, (u32)blen, meta);
d552a2b9
BP
1612 (*btotal) += blen;
1613 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1614 }
1615out_unlock:
1616 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1617 outside the rgrp we just processed,
1618 do it all over again. */
1619 if (current->journal_info) {
5cf26b1e
AG
1620 struct buffer_head *dibh;
1621
1622 ret = gfs2_meta_inode_buffer(ip, &dibh);
1623 if (ret)
1624 goto out;
d552a2b9
BP
1625
1626 /* Every transaction boundary, we rewrite the dinode
1627 to keep its di_blocks current in case of failure. */
1628 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1629 current_time(&ip->i_inode);
d552a2b9
BP
1630 gfs2_trans_add_meta(ip->i_gl, dibh);
1631 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1632 brelse(dibh);
d552a2b9
BP
1633 up_write(&ip->i_rw_mutex);
1634 gfs2_trans_end(sdp);
1635 }
1636 gfs2_glock_dq_uninit(rd_gh);
1637 cond_resched();
1638 goto more_rgrps;
1639 }
1640out:
1641 return ret;
1642}
1643
10d2cf94
AG
1644static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1645{
1646 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1647 return false;
1648 return true;
1649}
1650
d552a2b9
BP
1651/**
1652 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1653 * @mp: starting metapath
1654 * @h: desired height to search
1655 *
10d2cf94 1656 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1657 * Returns: true if a non-null pointer was found in the metapath buffer
1658 * false if all remaining pointers are NULL in the buffer
1659 */
1660static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1661 unsigned int h,
1662 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1663{
10d2cf94
AG
1664 struct buffer_head *bh = mp->mp_bh[h];
1665 __be64 *first, *ptr, *end;
1666
1667 first = metaptr1(h, mp);
1668 ptr = first + mp->mp_list[h];
1669 end = (__be64 *)(bh->b_data + bh->b_size);
1670 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1671 bool keep_end = h < end_aligned;
1672 end = first + end_list[h] + keep_end;
1673 }
d552a2b9 1674
10d2cf94 1675 while (ptr < end) {
c4a9d189 1676 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1677 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1678 h++;
1679 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1680 mp->mp_list[h] = 0;
d552a2b9 1681 return true;
c4a9d189 1682 }
10d2cf94 1683 ptr++;
d552a2b9 1684 }
10d2cf94 1685 return false;
d552a2b9
BP
1686}
1687
1688enum dealloc_states {
1689 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1690 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1691 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1692 DEALLOC_DONE = 3, /* process complete */
1693};
b3b94faa 1694
5cf26b1e
AG
1695static inline void
1696metapointer_range(struct metapath *mp, int height,
1697 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1698 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1699 __be64 **start, __be64 **end)
1700{
1701 struct buffer_head *bh = mp->mp_bh[height];
1702 __be64 *first;
1703
1704 first = metaptr1(height, mp);
1705 *start = first;
1706 if (mp_eq_to_hgt(mp, start_list, height)) {
1707 bool keep_start = height < start_aligned;
1708 *start = first + start_list[height] + keep_start;
1709 }
1710 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1711 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1712 bool keep_end = height < end_aligned;
1713 *end = first + end_list[height] + keep_end;
1714 }
1715}
1716
1717static inline bool walk_done(struct gfs2_sbd *sdp,
1718 struct metapath *mp, int height,
1719 __u16 *end_list, unsigned int end_aligned)
1720{
1721 __u16 end;
1722
1723 if (end_list) {
1724 bool keep_end = height < end_aligned;
1725 if (!mp_eq_to_hgt(mp, end_list, height))
1726 return false;
1727 end = end_list[height] + keep_end;
1728 } else
1729 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1730 return mp->mp_list[height] >= end;
5cf26b1e
AG
1731}
1732
d552a2b9 1733/**
10d2cf94 1734 * punch_hole - deallocate blocks in a file
d552a2b9 1735 * @ip: inode to truncate
10d2cf94
AG
1736 * @offset: the start of the hole
1737 * @length: the size of the hole (or 0 for truncate)
1738 *
1739 * Punch a hole into a file or truncate a file at a given position. This
1740 * function operates in whole blocks (@offset and @length are rounded
1741 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1742 *
10d2cf94
AG
1743 * This function works from the bottom up, and from the right to the left. In
1744 * other words, it strips off the highest layer (data) before stripping any of
1745 * the metadata. Doing it this way is best in case the operation is interrupted
1746 * by power failure, etc. The dinode is rewritten in every transaction to
1747 * guarantee integrity.
d552a2b9 1748 */
10d2cf94 1749static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1750{
1751 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1752 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1753 struct metapath mp = {};
d552a2b9
BP
1754 struct buffer_head *dibh, *bh;
1755 struct gfs2_holder rd_gh;
cb7f0903 1756 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1757 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1758 __u16 start_list[GFS2_MAX_META_HEIGHT];
1759 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1760 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1761 unsigned int strip_h = ip->i_height - 1;
1762 u32 btotal = 0;
1763 int ret, state;
1764 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1765 u64 prev_bnr = 0;
5cf26b1e 1766 __be64 *start, *end;
b3b94faa 1767
bb491ce6
AG
1768 if (offset >= maxsize) {
1769 /*
1770 * The starting point lies beyond the allocated meta-data;
1771 * there are no blocks do deallocate.
1772 */
1773 return 0;
1774 }
1775
10d2cf94
AG
1776 /*
1777 * The start position of the hole is defined by lblock, start_list, and
1778 * start_aligned. The end position of the hole is defined by lend,
1779 * end_list, and end_aligned.
1780 *
1781 * start_aligned and end_aligned define down to which height the start
1782 * and end positions are aligned to the metadata tree (i.e., the
1783 * position is a multiple of the metadata granularity at the height
1784 * above). This determines at which heights additional meta pointers
1785 * needs to be preserved for the remaining data.
1786 */
b3b94faa 1787
10d2cf94 1788 if (length) {
10d2cf94
AG
1789 u64 end_offset = offset + length;
1790 u64 lend;
1791
1792 /*
1793 * Clip the end at the maximum file size for the given height:
1794 * that's how far the metadata goes; files bigger than that
1795 * will have additional layers of indirection.
1796 */
1797 if (end_offset > maxsize)
1798 end_offset = maxsize;
1799 lend = end_offset >> bsize_shift;
1800
1801 if (lblock >= lend)
1802 return 0;
1803
1804 find_metapath(sdp, lend, &mp, ip->i_height);
1805 end_list = __end_list;
1806 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1807
1808 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1809 if (end_list[mp_h])
1810 break;
1811 }
1812 end_aligned = mp_h;
1813 }
1814
1815 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1816 memcpy(start_list, mp.mp_list, sizeof(start_list));
1817
cb7f0903
AG
1818 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1819 if (start_list[mp_h])
1820 break;
1821 }
1822 start_aligned = mp_h;
d552a2b9
BP
1823
1824 ret = gfs2_meta_inode_buffer(ip, &dibh);
1825 if (ret)
1826 return ret;
b3b94faa 1827
d552a2b9
BP
1828 mp.mp_bh[0] = dibh;
1829 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1830 if (ret)
1831 goto out_metapath;
c3ce5aa9
AG
1832
1833 /* issue read-ahead on metadata */
5cf26b1e
AG
1834 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1835 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1836 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1837 gfs2_metapath_ra(ip->i_gl, start, end);
1838 }
c3ce5aa9 1839
e8b43fe0 1840 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1841 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1842 else
1843 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1844
d552a2b9
BP
1845 ret = gfs2_rindex_update(sdp);
1846 if (ret)
1847 goto out_metapath;
1848
1849 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1850 if (ret)
1851 goto out_metapath;
1852 gfs2_holder_mark_uninitialized(&rd_gh);
1853
1854 mp_h = strip_h;
1855
1856 while (state != DEALLOC_DONE) {
1857 switch (state) {
1858 /* Truncate a full metapath at the given strip height.
1859 * Note that strip_h == mp_h in order to be in this state. */
1860 case DEALLOC_MP_FULL:
d552a2b9
BP
1861 bh = mp.mp_bh[mp_h];
1862 gfs2_assert_withdraw(sdp, bh);
1863 if (gfs2_assert_withdraw(sdp,
1864 prev_bnr != bh->b_blocknr)) {
1865 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1866 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1867 sdp->sd_fsname,
1868 (unsigned long long)ip->i_no_addr,
1869 prev_bnr, ip->i_height, strip_h, mp_h);
1870 }
1871 prev_bnr = bh->b_blocknr;
cb7f0903 1872
5cf26b1e
AG
1873 if (gfs2_metatype_check(sdp, bh,
1874 (mp_h ? GFS2_METATYPE_IN :
1875 GFS2_METATYPE_DI))) {
1876 ret = -EIO;
1877 goto out;
1878 }
1879
10d2cf94
AG
1880 /*
1881 * Below, passing end_aligned as 0 gives us the
1882 * metapointer range excluding the end point: the end
1883 * point is the first metapath we must not deallocate!
1884 */
1885
5cf26b1e 1886 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1887 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1888 &start, &end);
1889 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1890 start, end,
1891 mp_h != ip->i_height - 1,
1892 &btotal);
cb7f0903 1893
d552a2b9
BP
1894 /* If we hit an error or just swept dinode buffer,
1895 just exit. */
1896 if (ret || !mp_h) {
1897 state = DEALLOC_DONE;
1898 break;
1899 }
1900 state = DEALLOC_MP_LOWER;
1901 break;
1902
1903 /* lower the metapath strip height */
1904 case DEALLOC_MP_LOWER:
1905 /* We're done with the current buffer, so release it,
1906 unless it's the dinode buffer. Then back up to the
1907 previous pointer. */
1908 if (mp_h) {
1909 brelse(mp.mp_bh[mp_h]);
1910 mp.mp_bh[mp_h] = NULL;
1911 }
1912 /* If we can't get any lower in height, we've stripped
1913 off all we can. Next step is to back up and start
1914 stripping the previous level of metadata. */
1915 if (mp_h == 0) {
1916 strip_h--;
cb7f0903 1917 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1918 mp_h = strip_h;
1919 state = DEALLOC_FILL_MP;
1920 break;
1921 }
1922 mp.mp_list[mp_h] = 0;
1923 mp_h--; /* search one metadata height down */
d552a2b9 1924 mp.mp_list[mp_h]++;
10d2cf94
AG
1925 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1926 break;
d552a2b9
BP
1927 /* Here we've found a part of the metapath that is not
1928 * allocated. We need to search at that height for the
1929 * next non-null pointer. */
10d2cf94 1930 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1931 state = DEALLOC_FILL_MP;
1932 mp_h++;
1933 }
1934 /* No more non-null pointers at this height. Back up
1935 to the previous height and try again. */
1936 break; /* loop around in the same state */
1937
1938 /* Fill the metapath with buffers to the given height. */
1939 case DEALLOC_FILL_MP:
1940 /* Fill the buffers out to the current height. */
1941 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1942 if (ret < 0)
d552a2b9 1943 goto out;
c3ce5aa9 1944
e7445ced
AG
1945 /* On the first pass, issue read-ahead on metadata. */
1946 if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
1947 unsigned int height = mp.mp_aheight - 1;
1948
1949 /* No read-ahead for data blocks. */
1950 if (mp.mp_aheight - 1 == strip_h)
1951 height--;
1952
1953 for (; height >= mp.mp_aheight - ret; height--) {
1954 metapointer_range(&mp, height,
5cf26b1e 1955 start_list, start_aligned,
10d2cf94 1956 end_list, end_aligned,
5cf26b1e
AG
1957 &start, &end);
1958 gfs2_metapath_ra(ip->i_gl, start, end);
1959 }
c3ce5aa9 1960 }
d552a2b9
BP
1961
1962 /* If buffers found for the entire strip height */
e8b43fe0 1963 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1964 state = DEALLOC_MP_FULL;
1965 break;
1966 }
e8b43fe0
AG
1967 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1968 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1969
1970 /* If we find a non-null block pointer, crawl a bit
1971 higher up in the metapath and try again, otherwise
1972 we need to look lower for a new starting point. */
10d2cf94 1973 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1974 mp_h++;
1975 else
1976 state = DEALLOC_MP_LOWER;
b3b94faa 1977 break;
d552a2b9 1978 }
b3b94faa
DT
1979 }
1980
d552a2b9
BP
1981 if (btotal) {
1982 if (current->journal_info == NULL) {
1983 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1984 RES_QUOTA, 0);
1985 if (ret)
1986 goto out;
1987 down_write(&ip->i_rw_mutex);
1988 }
1989 gfs2_statfs_change(sdp, 0, +btotal, 0);
1990 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1991 ip->i_inode.i_gid);
b32c8c76 1992 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1993 gfs2_trans_add_meta(ip->i_gl, dibh);
1994 gfs2_dinode_out(ip, dibh->b_data);
1995 up_write(&ip->i_rw_mutex);
1996 gfs2_trans_end(sdp);
1997 }
b3b94faa 1998
d552a2b9
BP
1999out:
2000 if (gfs2_holder_initialized(&rd_gh))
2001 gfs2_glock_dq_uninit(&rd_gh);
2002 if (current->journal_info) {
2003 up_write(&ip->i_rw_mutex);
2004 gfs2_trans_end(sdp);
2005 cond_resched();
2006 }
2007 gfs2_quota_unhold(ip);
2008out_metapath:
2009 release_metapath(&mp);
2010 return ret;
b3b94faa
DT
2011}
2012
2013static int trunc_end(struct gfs2_inode *ip)
2014{
feaa7bba 2015 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
2016 struct buffer_head *dibh;
2017 int error;
2018
2019 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2020 if (error)
2021 return error;
2022
2023 down_write(&ip->i_rw_mutex);
2024
2025 error = gfs2_meta_inode_buffer(ip, &dibh);
2026 if (error)
2027 goto out;
2028
a2e0f799 2029 if (!i_size_read(&ip->i_inode)) {
ecc30c79 2030 ip->i_height = 0;
ce276b06 2031 ip->i_goal = ip->i_no_addr;
b3b94faa 2032 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 2033 gfs2_ordered_del_inode(ip);
b3b94faa 2034 }
078cd827 2035 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 2036 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 2037
350a9b0a 2038 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 2039 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
2040 brelse(dibh);
2041
a91ea69f 2042out:
b3b94faa 2043 up_write(&ip->i_rw_mutex);
b3b94faa 2044 gfs2_trans_end(sdp);
b3b94faa
DT
2045 return error;
2046}
2047
2048/**
2049 * do_shrink - make a file smaller
ff8f33c8 2050 * @inode: the inode
ff8f33c8 2051 * @newsize: the size to make the file
b3b94faa 2052 *
ff8f33c8
SW
2053 * Called with an exclusive lock on @inode. The @size must
2054 * be equal to or smaller than the current inode size.
b3b94faa
DT
2055 *
2056 * Returns: errno
2057 */
2058
8b5860a3 2059static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 2060{
ff8f33c8 2061 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
2062 int error;
2063
8b5860a3 2064 error = trunc_start(inode, newsize);
b3b94faa
DT
2065 if (error < 0)
2066 return error;
ff8f33c8 2067 if (gfs2_is_stuffed(ip))
b3b94faa
DT
2068 return 0;
2069
10d2cf94 2070 error = punch_hole(ip, newsize, 0);
ff8f33c8 2071 if (error == 0)
b3b94faa
DT
2072 error = trunc_end(ip);
2073
2074 return error;
2075}
2076
ff8f33c8 2077void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 2078{
ff8f33c8
SW
2079 int ret;
2080
8b5860a3 2081 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
2082 WARN_ON(ret != 0);
2083}
2084
2085/**
2086 * do_grow - Touch and update inode size
2087 * @inode: The inode
2088 * @size: The new size
2089 *
2090 * This function updates the timestamps on the inode and
2091 * may also increase the size of the inode. This function
2092 * must not be called with @size any smaller than the current
2093 * inode size.
2094 *
2095 * Although it is not strictly required to unstuff files here,
2096 * earlier versions of GFS2 have a bug in the stuffed file reading
2097 * code which will result in a buffer overrun if the size is larger
2098 * than the max stuffed file size. In order to prevent this from
25985edc 2099 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
2100 * just update the inode size directly.
2101 *
2102 * Returns: 0 on success, or -ve on error
2103 */
2104
2105static int do_grow(struct inode *inode, u64 size)
2106{
2107 struct gfs2_inode *ip = GFS2_I(inode);
2108 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 2109 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
2110 struct buffer_head *dibh;
2111 int error;
2f7ee358 2112 int unstuff = 0;
a13b8c5f 2113
235628c5 2114 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 2115 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 2116 if (error)
5407e242 2117 return error;
ff8f33c8 2118
7b9cff46 2119 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
2120 if (error)
2121 goto do_grow_qunlock;
2f7ee358 2122 unstuff = 1;
ff8f33c8
SW
2123 }
2124
a01aedfe 2125 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
bc020561
BP
2126 (unstuff &&
2127 gfs2_is_jdata(ip) ? RES_JDATA : 0) +
a01aedfe
BP
2128 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
2129 0 : RES_QUOTA), 0);
a13b8c5f 2130 if (error)
ff8f33c8 2131 goto do_grow_release;
a13b8c5f 2132
2f7ee358 2133 if (unstuff) {
ff8f33c8
SW
2134 error = gfs2_unstuff_dinode(ip, NULL);
2135 if (error)
2136 goto do_end_trans;
2137 }
a13b8c5f
WC
2138
2139 error = gfs2_meta_inode_buffer(ip, &dibh);
2140 if (error)
ff8f33c8 2141 goto do_end_trans;
a13b8c5f 2142
ff8f33c8 2143 i_size_write(inode, size);
078cd827 2144 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 2145 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
2146 gfs2_dinode_out(ip, dibh->b_data);
2147 brelse(dibh);
2148
ff8f33c8 2149do_end_trans:
a13b8c5f 2150 gfs2_trans_end(sdp);
ff8f33c8 2151do_grow_release:
2f7ee358 2152 if (unstuff) {
ff8f33c8
SW
2153 gfs2_inplace_release(ip);
2154do_grow_qunlock:
2155 gfs2_quota_unlock(ip);
ff8f33c8 2156 }
a13b8c5f
WC
2157 return error;
2158}
2159
b3b94faa 2160/**
ff8f33c8
SW
2161 * gfs2_setattr_size - make a file a given size
2162 * @inode: the inode
2163 * @newsize: the size to make the file
b3b94faa 2164 *
ff8f33c8 2165 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 2166 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 2167 * in question.
b3b94faa
DT
2168 *
2169 * Returns: errno
2170 */
2171
ff8f33c8 2172int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 2173{
af5c2697 2174 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 2175 int ret;
b3b94faa 2176
ff8f33c8 2177 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 2178
ff8f33c8
SW
2179 ret = inode_newsize_ok(inode, newsize);
2180 if (ret)
2181 return ret;
b3b94faa 2182
562c72aa
CH
2183 inode_dio_wait(inode);
2184
b54e9a0b 2185 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 2186 if (ret)
2b3dcf35 2187 goto out;
d2b47cfb 2188
8b5860a3 2189 if (newsize >= inode->i_size) {
2b3dcf35
BP
2190 ret = do_grow(inode, newsize);
2191 goto out;
2192 }
ff8f33c8 2193
8b5860a3 2194 ret = do_shrink(inode, newsize);
2b3dcf35 2195out:
a097dc7e 2196 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 2197 return ret;
b3b94faa
DT
2198}
2199
2200int gfs2_truncatei_resume(struct gfs2_inode *ip)
2201{
2202 int error;
10d2cf94 2203 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
2204 if (!error)
2205 error = trunc_end(ip);
2206 return error;
2207}
2208
2209int gfs2_file_dealloc(struct gfs2_inode *ip)
2210{
10d2cf94 2211 return punch_hole(ip, 0, 0);
b3b94faa
DT
2212}
2213
b50f227b
SW
2214/**
2215 * gfs2_free_journal_extents - Free cached journal bmap info
2216 * @jd: The journal
2217 *
2218 */
2219
2220void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
2221{
2222 struct gfs2_journal_extent *jext;
2223
2224 while(!list_empty(&jd->extent_list)) {
2225 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
2226 list_del(&jext->list);
2227 kfree(jext);
2228 }
2229}
2230
2231/**
2232 * gfs2_add_jextent - Add or merge a new extent to extent cache
2233 * @jd: The journal descriptor
2234 * @lblock: The logical block at start of new extent
c62baf65 2235 * @dblock: The physical block at start of new extent
b50f227b
SW
2236 * @blocks: Size of extent in fs blocks
2237 *
2238 * Returns: 0 on success or -ENOMEM
2239 */
2240
2241static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
2242{
2243 struct gfs2_journal_extent *jext;
2244
2245 if (!list_empty(&jd->extent_list)) {
2246 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
2247 if ((jext->dblock + jext->blocks) == dblock) {
2248 jext->blocks += blocks;
2249 return 0;
2250 }
2251 }
2252
2253 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
2254 if (jext == NULL)
2255 return -ENOMEM;
2256 jext->dblock = dblock;
2257 jext->lblock = lblock;
2258 jext->blocks = blocks;
2259 list_add_tail(&jext->list, &jd->extent_list);
2260 jd->nr_extents++;
2261 return 0;
2262}
2263
2264/**
2265 * gfs2_map_journal_extents - Cache journal bmap info
2266 * @sdp: The super block
2267 * @jd: The journal to map
2268 *
2269 * Create a reusable "extent" mapping from all logical
2270 * blocks to all physical blocks for the given journal. This will save
2271 * us time when writing journal blocks. Most journals will have only one
2272 * extent that maps all their logical blocks. That's because gfs2.mkfs
2273 * arranges the journal blocks sequentially to maximize performance.
2274 * So the extent would map the first block for the entire file length.
2275 * However, gfs2_jadd can happen while file activity is happening, so
2276 * those journals may not be sequential. Less likely is the case where
2277 * the users created their own journals by mounting the metafs and
2278 * laying it out. But it's still possible. These journals might have
2279 * several extents.
2280 *
2281 * Returns: 0 on success, or error on failure
2282 */
2283
2284int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
2285{
2286 u64 lblock = 0;
2287 u64 lblock_stop;
2288 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
2289 struct buffer_head bh;
2290 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2291 u64 size;
2292 int rc;
98583b3e 2293 ktime_t start, end;
b50f227b 2294
98583b3e 2295 start = ktime_get();
b50f227b
SW
2296 lblock_stop = i_size_read(jd->jd_inode) >> shift;
2297 size = (lblock_stop - lblock) << shift;
2298 jd->nr_extents = 0;
2299 WARN_ON(!list_empty(&jd->extent_list));
2300
2301 do {
2302 bh.b_state = 0;
2303 bh.b_blocknr = 0;
2304 bh.b_size = size;
2305 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2306 if (rc || !buffer_mapped(&bh))
2307 goto fail;
2308 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2309 if (rc)
2310 goto fail;
2311 size -= bh.b_size;
2312 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2313 } while(size > 0);
2314
98583b3e
AD
2315 end = ktime_get();
2316 fs_info(sdp, "journal %d mapped with %u extents in %lldms\n", jd->jd_jid,
2317 jd->nr_extents, ktime_ms_delta(end, start));
b50f227b
SW
2318 return 0;
2319
2320fail:
2321 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2322 rc, jd->jd_jid,
2323 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2324 jd->nr_extents);
2325 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2326 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2327 bh.b_state, (unsigned long long)bh.b_size);
2328 gfs2_free_journal_extents(jd);
2329 return rc;
2330}
2331
b3b94faa
DT
2332/**
2333 * gfs2_write_alloc_required - figure out if a write will require an allocation
2334 * @ip: the file being written to
2335 * @offset: the offset to write to
2336 * @len: the number of bytes being written
b3b94faa 2337 *
461cb419 2338 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2339 */
2340
cd915493 2341int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2342 unsigned int len)
b3b94faa 2343{
feaa7bba 2344 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2345 struct buffer_head bh;
2346 unsigned int shift;
2347 u64 lblock, lblock_stop, size;
7ed122e4 2348 u64 end_of_file;
b3b94faa 2349
b3b94faa
DT
2350 if (!len)
2351 return 0;
2352
2353 if (gfs2_is_stuffed(ip)) {
235628c5 2354 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2355 return 1;
b3b94faa
DT
2356 return 0;
2357 }
2358
941e6d7d 2359 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2360 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2361 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2362 lblock = offset >> shift;
2363 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
77612578 2364 if (lblock_stop > end_of_file && ip != GFS2_I(sdp->sd_rindex))
461cb419 2365 return 1;
b3b94faa 2366
941e6d7d
SW
2367 size = (lblock_stop - lblock) << shift;
2368 do {
2369 bh.b_state = 0;
2370 bh.b_size = size;
2371 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2372 if (!buffer_mapped(&bh))
461cb419 2373 return 1;
941e6d7d
SW
2374 size -= bh.b_size;
2375 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2376 } while(size > 0);
b3b94faa
DT
2377
2378 return 0;
2379}
2380
4e56a641
AG
2381static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2382{
2383 struct gfs2_inode *ip = GFS2_I(inode);
2384 struct buffer_head *dibh;
2385 int error;
2386
2387 if (offset >= inode->i_size)
2388 return 0;
2389 if (offset + length > inode->i_size)
2390 length = inode->i_size - offset;
2391
2392 error = gfs2_meta_inode_buffer(ip, &dibh);
2393 if (error)
2394 return error;
2395 gfs2_trans_add_meta(ip->i_gl, dibh);
2396 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2397 length);
2398 brelse(dibh);
2399 return 0;
2400}
2401
2402static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2403 loff_t length)
2404{
2405 struct gfs2_sbd *sdp = GFS2_SB(inode);
2406 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2407 int error;
2408
2409 while (length) {
2410 struct gfs2_trans *tr;
2411 loff_t chunk;
2412 unsigned int offs;
2413
2414 chunk = length;
2415 if (chunk > max_chunk)
2416 chunk = max_chunk;
2417
2418 offs = offset & ~PAGE_MASK;
2419 if (offs && chunk > PAGE_SIZE)
2420 chunk = offs + ((chunk - offs) & PAGE_MASK);
2421
2422 truncate_pagecache_range(inode, offset, chunk);
2423 offset += chunk;
2424 length -= chunk;
2425
2426 tr = current->journal_info;
2427 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2428 continue;
2429
2430 gfs2_trans_end(sdp);
2431 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2432 if (error)
2433 return error;
2434 }
2435 return 0;
2436}
2437
2438int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2439{
2440 struct inode *inode = file_inode(file);
2441 struct gfs2_inode *ip = GFS2_I(inode);
2442 struct gfs2_sbd *sdp = GFS2_SB(inode);
2443 int error;
2444
2445 if (gfs2_is_jdata(ip))
2446 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2447 GFS2_JTRUNC_REVOKES);
2448 else
2449 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2450 if (error)
2451 return error;
2452
2453 if (gfs2_is_stuffed(ip)) {
2454 error = stuffed_zero_range(inode, offset, length);
2455 if (error)
2456 goto out;
2457 } else {
00251a16 2458 unsigned int start_off, end_len, blocksize;
4e56a641
AG
2459
2460 blocksize = i_blocksize(inode);
2461 start_off = offset & (blocksize - 1);
00251a16 2462 end_len = (offset + length) & (blocksize - 1);
4e56a641
AG
2463 if (start_off) {
2464 unsigned int len = length;
2465 if (length > blocksize - start_off)
2466 len = blocksize - start_off;
2467 error = gfs2_block_zero_range(inode, offset, len);
2468 if (error)
2469 goto out;
2470 if (start_off + length < blocksize)
00251a16 2471 end_len = 0;
4e56a641 2472 }
00251a16 2473 if (end_len) {
4e56a641 2474 error = gfs2_block_zero_range(inode,
00251a16 2475 offset + length - end_len, end_len);
4e56a641
AG
2476 if (error)
2477 goto out;
2478 }
2479 }
2480
2481 if (gfs2_is_jdata(ip)) {
2482 BUG_ON(!current->journal_info);
2483 gfs2_journaled_truncate_range(inode, offset, length);
2484 } else
2485 truncate_pagecache_range(inode, offset, offset + length - 1);
2486
2487 file_update_time(file);
2488 mark_inode_dirty(inode);
2489
2490 if (current->journal_info)
2491 gfs2_trans_end(sdp);
2492
2493 if (!gfs2_is_stuffed(ip))
2494 error = punch_hole(ip, offset, length);
2495
2496out:
2497 if (current->journal_info)
2498 gfs2_trans_end(sdp);
2499 return error;
2500}