gfs2: Remove minor gfs2_journaled_truncate inefficiencies
[linux-block.git] / fs / gfs2 / bmap.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
b3b94faa
DT
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
64dd153c 13#include <linux/blkdev.h>
5c676f6d 14#include <linux/gfs2_ondisk.h>
71b86f56 15#include <linux/crc32.h>
3974320c 16#include <linux/iomap.h>
b3b94faa
DT
17
18#include "gfs2.h"
5c676f6d 19#include "incore.h"
b3b94faa
DT
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
b3b94faa 23#include "meta_io.h"
b3b94faa
DT
24#include "quota.h"
25#include "rgrp.h"
45138990 26#include "log.h"
4c16c36a 27#include "super.h"
b3b94faa 28#include "trans.h"
18ec7d5c 29#include "dir.h"
5c676f6d 30#include "util.h"
63997775 31#include "trace_gfs2.h"
b3b94faa
DT
32
33/* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
35 * keep it small.
36 */
37struct metapath {
dbac6710 38 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 39 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
40 int mp_fheight; /* find_metapath height */
41 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
42};
43
f25ef0c1
SW
44/**
45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
46 * @ip: the inode
47 * @dibh: the dinode buffer
48 * @block: the block number that was allocated
ff8f33c8 49 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
50 *
51 * Returns: errno
52 */
53
54static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 55 u64 block, struct page *page)
f25ef0c1 56{
f25ef0c1
SW
57 struct inode *inode = &ip->i_inode;
58 struct buffer_head *bh;
59 int release = 0;
60
61 if (!page || page->index) {
220cca2a 62 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
63 if (!page)
64 return -ENOMEM;
65 release = 1;
66 }
67
68 if (!PageUptodate(page)) {
69 void *kaddr = kmap(page);
602c89d2
SW
70 u64 dsize = i_size_read(inode);
71
72 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
73 dsize = dibh->b_size - sizeof(struct gfs2_dinode);
f25ef0c1 74
602c89d2 75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
77 kunmap(page);
78
79 SetPageUptodate(page);
80 }
81
82 if (!page_has_buffers(page))
47a9a527
FF
83 create_empty_buffers(page, BIT(inode->i_blkbits),
84 BIT(BH_Uptodate));
f25ef0c1
SW
85
86 bh = page_buffers(page);
87
88 if (!buffer_mapped(bh))
89 map_bh(bh, inode->i_sb, block);
90
91 set_buffer_uptodate(bh);
eaf96527
SW
92 if (!gfs2_is_jdata(ip))
93 mark_buffer_dirty(bh);
bf36a713 94 if (!gfs2_is_writeback(ip))
350a9b0a 95 gfs2_trans_add_data(ip->i_gl, bh);
f25ef0c1
SW
96
97 if (release) {
98 unlock_page(page);
09cbfeaf 99 put_page(page);
f25ef0c1
SW
100 }
101
102 return 0;
103}
104
b3b94faa
DT
105/**
106 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
107 * @ip: The GFS2 inode to unstuff
ff8f33c8 108 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
109 *
110 * This routine unstuffs a dinode and returns it to a "normal" state such
111 * that the height can be grown in the traditional way.
112 *
113 * Returns: errno
114 */
115
f25ef0c1 116int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
117{
118 struct buffer_head *bh, *dibh;
48516ced 119 struct gfs2_dinode *di;
cd915493 120 u64 block = 0;
18ec7d5c 121 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
122 int error;
123
124 down_write(&ip->i_rw_mutex);
125
126 error = gfs2_meta_inode_buffer(ip, &dibh);
127 if (error)
128 goto out;
907b9bce 129
a2e0f799 130 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
131 /* Get a free block, fill it with the stuffed data,
132 and write it out to disk */
133
b45e41d7 134 unsigned int n = 1;
6e87ed0f 135 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
136 if (error)
137 goto out_brelse;
18ec7d5c 138 if (isdir) {
5731be53 139 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 140 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
141 if (error)
142 goto out_brelse;
48516ced 143 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
144 dibh, sizeof(struct gfs2_dinode));
145 brelse(bh);
146 } else {
f25ef0c1 147 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
148 if (error)
149 goto out_brelse;
150 }
151 }
152
153 /* Set up the pointer to the new block */
154
350a9b0a 155 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 156 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
157 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
158
a2e0f799 159 if (i_size_read(&ip->i_inode)) {
48516ced 160 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
161 gfs2_add_inode_blocks(&ip->i_inode, 1);
162 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
163 }
164
ecc30c79 165 ip->i_height = 1;
48516ced 166 di->di_height = cpu_to_be16(1);
b3b94faa 167
a91ea69f 168out_brelse:
b3b94faa 169 brelse(dibh);
a91ea69f 170out:
b3b94faa 171 up_write(&ip->i_rw_mutex);
b3b94faa
DT
172 return error;
173}
174
b3b94faa
DT
175
176/**
177 * find_metapath - Find path through the metadata tree
9b8c81d1 178 * @sdp: The superblock
b3b94faa
DT
179 * @mp: The metapath to return the result in
180 * @block: The disk block to look up
9b8c81d1 181 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
182 *
183 * This routine returns a struct metapath structure that defines a path
184 * through the metadata of inode "ip" to get to block "block".
185 *
186 * Example:
187 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
188 * filesystem with a blocksize of 4096.
189 *
190 * find_metapath() would return a struct metapath structure set to:
191 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
192 * and mp_list[2] = 165.
193 *
194 * That means that in order to get to the block containing the byte at
195 * offset 101342453, we would load the indirect block pointed to by pointer
196 * 0 in the dinode. We would then load the indirect block pointed to by
197 * pointer 48 in that indirect block. We would then load the data block
198 * pointed to by pointer 165 in that indirect block.
199 *
200 * ----------------------------------------
201 * | Dinode | |
202 * | | 4|
203 * | |0 1 2 3 4 5 9|
204 * | | 6|
205 * ----------------------------------------
206 * |
207 * |
208 * V
209 * ----------------------------------------
210 * | Indirect Block |
211 * | 5|
212 * | 4 4 4 4 4 5 5 1|
213 * |0 5 6 7 8 9 0 1 2|
214 * ----------------------------------------
215 * |
216 * |
217 * V
218 * ----------------------------------------
219 * | Indirect Block |
220 * | 1 1 1 1 1 5|
221 * | 6 6 6 6 6 1|
222 * |0 3 4 5 6 7 2|
223 * ----------------------------------------
224 * |
225 * |
226 * V
227 * ----------------------------------------
228 * | Data block containing offset |
229 * | 101342453 |
230 * | |
231 * | |
232 * ----------------------------------------
233 *
234 */
235
9b8c81d1
SW
236static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
237 struct metapath *mp, unsigned int height)
b3b94faa 238{
b3b94faa
DT
239 unsigned int i;
240
5f8bd444 241 mp->mp_fheight = height;
9b8c81d1 242 for (i = height; i--;)
7eabb77e 243 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
244}
245
5af4e7a0 246static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 247{
5af4e7a0
BM
248 if (mp->mp_list[0] == 0)
249 return 2;
250 return 1;
9b8c81d1
SW
251}
252
d552a2b9 253/**
20cdc193 254 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
255 * @height: The metadata height (0 = dinode)
256 * @mp: The metapath
257 */
258static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
259{
260 struct buffer_head *bh = mp->mp_bh[height];
261 if (height == 0)
262 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
263 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
264}
265
b3b94faa
DT
266/**
267 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
268 * @height: The metadata height (0 = dinode)
269 * @mp: The metapath
270 *
271 * Return a pointer to the block number of the next height of the metadata
272 * tree given a buffer containing the pointer to the current height of the
273 * metadata tree.
274 */
275
9b8c81d1 276static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 277{
d552a2b9
BP
278 __be64 *p = metaptr1(height, mp);
279 return p + mp->mp_list[height];
b3b94faa
DT
280}
281
b99b98dc
SW
282static void gfs2_metapath_ra(struct gfs2_glock *gl,
283 const struct buffer_head *bh, const __be64 *pos)
284{
285 struct buffer_head *rabh;
286 const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
287 const __be64 *t;
288
289 for (t = pos; t < endp; t++) {
290 if (!*t)
291 continue;
292
293 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
294 if (trylock_buffer(rabh)) {
295 if (!buffer_uptodate(rabh)) {
296 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
297 submit_bh(REQ_OP_READ,
298 REQ_RAHEAD | REQ_META | REQ_PRIO,
299 rabh);
b99b98dc
SW
300 continue;
301 }
302 unlock_buffer(rabh);
303 }
304 brelse(rabh);
305 }
306}
307
d552a2b9
BP
308/**
309 * lookup_mp_height - helper function for lookup_metapath
310 * @ip: the inode
311 * @mp: the metapath
312 * @h: the height which needs looking up
313 */
314static int lookup_mp_height(struct gfs2_inode *ip, struct metapath *mp, int h)
315{
316 __be64 *ptr = metapointer(h, mp);
317 u64 dblock = be64_to_cpu(*ptr);
318
319 if (!dblock)
320 return h + 1;
321
322 return gfs2_meta_indirect_buffer(ip, h + 1, dblock, &mp->mp_bh[h + 1]);
323}
324
b3b94faa 325/**
9b8c81d1
SW
326 * lookup_metapath - Walk the metadata tree to a specific point
327 * @ip: The inode
b3b94faa 328 * @mp: The metapath
b3b94faa 329 *
9b8c81d1
SW
330 * Assumes that the inode's buffer has already been looked up and
331 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
332 * by find_metapath().
333 *
334 * If this function encounters part of the tree which has not been
335 * allocated, it returns the current height of the tree at the point
336 * at which it found the unallocated block. Blocks which are found are
337 * added to the mp->mp_bh[] list.
b3b94faa 338 *
9b8c81d1 339 * Returns: error or height of metadata tree
b3b94faa
DT
340 */
341
9b8c81d1 342static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 343{
11707ea0
SW
344 unsigned int end_of_metadata = ip->i_height - 1;
345 unsigned int x;
e23159d2 346 int ret;
11707ea0
SW
347
348 for (x = 0; x < end_of_metadata; x++) {
d552a2b9 349 ret = lookup_mp_height(ip, mp, x);
11707ea0 350 if (ret)
5f8bd444 351 goto out;
11707ea0
SW
352 }
353
5f8bd444
BP
354 ret = ip->i_height;
355out:
356 mp->mp_aheight = ret;
357 return ret;
dbac6710
SW
358}
359
d552a2b9
BP
360/**
361 * fillup_metapath - fill up buffers for the metadata path to a specific height
362 * @ip: The inode
363 * @mp: The metapath
364 * @h: The height to which it should be mapped
365 *
366 * Similar to lookup_metapath, but does lookups for a range of heights
367 *
368 * Returns: error or height of metadata tree
369 */
370
371static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
372{
373 unsigned int start_h = h - 1;
374 int ret;
375
376 if (h) {
377 /* find the first buffer we need to look up. */
378 while (start_h > 0 && mp->mp_bh[start_h] == NULL)
379 start_h--;
380 for (; start_h < h; start_h++) {
381 ret = lookup_mp_height(ip, mp, start_h);
382 if (ret)
383 return ret;
384 }
385 }
386 return ip->i_height;
387}
388
9b8c81d1 389static inline void release_metapath(struct metapath *mp)
dbac6710
SW
390{
391 int i;
392
9b8c81d1
SW
393 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
394 if (mp->mp_bh[i] == NULL)
395 break;
396 brelse(mp->mp_bh[i]);
397 }
11707ea0
SW
398}
399
30cbf189
SW
400/**
401 * gfs2_extent_length - Returns length of an extent of blocks
402 * @start: Start of the buffer
403 * @len: Length of the buffer in bytes
404 * @ptr: Current position in the buffer
405 * @limit: Max extent length to return (0 = unlimited)
406 * @eob: Set to 1 if we hit "end of block"
407 *
408 * If the first block is zero (unallocated) it will return the number of
409 * unallocated blocks in the extent, otherwise it will return the number
410 * of contiguous blocks in the extent.
411 *
412 * Returns: The length of the extent (minimum of one block)
413 */
414
b650738c 415static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
30cbf189
SW
416{
417 const __be64 *end = (start + len);
418 const __be64 *first = ptr;
419 u64 d = be64_to_cpu(*ptr);
420
421 *eob = 0;
422 do {
423 ptr++;
424 if (ptr >= end)
425 break;
426 if (limit && --limit == 0)
427 break;
428 if (d)
429 d++;
430 } while(be64_to_cpu(*ptr) == d);
431 if (ptr >= end)
432 *eob = 1;
433 return (ptr - first);
434}
435
9b8c81d1 436static inline void bmap_lock(struct gfs2_inode *ip, int create)
4cf1ed81 437{
4cf1ed81
SW
438 if (create)
439 down_write(&ip->i_rw_mutex);
440 else
441 down_read(&ip->i_rw_mutex);
442}
443
9b8c81d1 444static inline void bmap_unlock(struct gfs2_inode *ip, int create)
4cf1ed81 445{
4cf1ed81
SW
446 if (create)
447 up_write(&ip->i_rw_mutex);
448 else
449 up_read(&ip->i_rw_mutex);
450}
451
9b8c81d1
SW
452static inline __be64 *gfs2_indirect_init(struct metapath *mp,
453 struct gfs2_glock *gl, unsigned int i,
454 unsigned offset, u64 bn)
455{
456 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
457 ((i > 1) ? sizeof(struct gfs2_meta_header) :
458 sizeof(struct gfs2_dinode)));
459 BUG_ON(i < 1);
460 BUG_ON(mp->mp_bh[i] != NULL);
461 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 462 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
463 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
464 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
465 ptr += offset;
466 *ptr = cpu_to_be64(bn);
467 return ptr;
468}
469
470enum alloc_state {
471 ALLOC_DATA = 0,
472 ALLOC_GROW_DEPTH = 1,
473 ALLOC_GROW_HEIGHT = 2,
474 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
475};
476
d552a2b9
BP
477static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt)
478{
479 if (hgt)
480 return sdp->sd_inptrs;
481 return sdp->sd_diptrs;
482}
483
9b8c81d1
SW
484/**
485 * gfs2_bmap_alloc - Build a metadata tree of the requested height
486 * @inode: The GFS2 inode
487 * @lblock: The logical starting block of the extent
488 * @bh_map: This is used to return the mapping details
5f8bd444
BP
489 * @zero_new: True if newly allocated blocks should be zeroed
490 * @mp: The metapath, with proper height information calculated
9b8c81d1 491 * @maxlen: The max number of data blocks to alloc
5f8bd444
BP
492 * @dblock: Pointer to return the resulting new block
493 * @dblks: Pointer to return the number of blocks allocated
9b8c81d1
SW
494 *
495 * In this routine we may have to alloc:
496 * i) Indirect blocks to grow the metadata tree height
497 * ii) Indirect blocks to fill in lower part of the metadata tree
498 * iii) Data blocks
499 *
500 * The function is in two parts. The first part works out the total
501 * number of blocks which we need. The second part does the actual
502 * allocation asking for an extent at a time (if enough contiguous free
503 * blocks are available, there will only be one request per bmap call)
504 * and uses the state machine to initialise the blocks in order.
505 *
506 * Returns: errno on error
507 */
508
3974320c
BP
509static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
510 unsigned flags, struct metapath *mp)
9b8c81d1
SW
511{
512 struct gfs2_inode *ip = GFS2_I(inode);
513 struct gfs2_sbd *sdp = GFS2_SB(inode);
64dd153c 514 struct super_block *sb = sdp->sd_vfs;
9b8c81d1 515 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 516 u64 bn;
5af4e7a0 517 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
3974320c 518 unsigned dblks = 0;
9b8c81d1 519 unsigned ptrs_per_blk;
5f8bd444 520 const unsigned end_of_metadata = mp->mp_fheight - 1;
64dd153c 521 int ret;
9b8c81d1
SW
522 enum alloc_state state;
523 __be64 *ptr;
524 __be64 zero_bn = 0;
3974320c 525 size_t maxlen = iomap->length >> inode->i_blkbits;
9b8c81d1 526
5f8bd444 527 BUG_ON(mp->mp_aheight < 1);
9b8c81d1
SW
528 BUG_ON(dibh == NULL);
529
350a9b0a 530 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 531
5f8bd444 532 if (mp->mp_fheight == mp->mp_aheight) {
9b8c81d1 533 struct buffer_head *bh;
3974320c
BP
534 int eob;
535
9b8c81d1
SW
536 /* Bottom indirect block exists, find unalloced extent size */
537 ptr = metapointer(end_of_metadata, mp);
538 bh = mp->mp_bh[end_of_metadata];
3974320c
BP
539 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
540 maxlen, &eob);
541 BUG_ON(dblks < 1);
9b8c81d1
SW
542 state = ALLOC_DATA;
543 } else {
544 /* Need to allocate indirect blocks */
5f8bd444
BP
545 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
546 sdp->sd_diptrs;
3974320c
BP
547 dblks = min(maxlen, (size_t)(ptrs_per_blk -
548 mp->mp_list[end_of_metadata]));
5f8bd444 549 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 550 /* Writing into existing tree, extend tree down */
5f8bd444 551 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
552 state = ALLOC_GROW_DEPTH;
553 } else {
554 /* Building up tree height */
555 state = ALLOC_GROW_HEIGHT;
5f8bd444 556 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 557 branch_start = metapath_branch_start(mp);
5f8bd444 558 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
559 }
560 }
561
562 /* start of the second part of the function (state machine) */
563
3974320c 564 blks = dblks + iblks;
5f8bd444 565 i = mp->mp_aheight;
9b8c81d1 566 do {
09010978 567 int error;
9b8c81d1 568 n = blks - alloced;
6e87ed0f 569 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
09010978
SW
570 if (error)
571 return error;
9b8c81d1
SW
572 alloced += n;
573 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
574 gfs2_trans_add_unrevoke(sdp, bn, n);
575 switch (state) {
576 /* Growing height of tree */
577 case ALLOC_GROW_HEIGHT:
578 if (i == 1) {
579 ptr = (__be64 *)(dibh->b_data +
580 sizeof(struct gfs2_dinode));
581 zero_bn = *ptr;
582 }
5f8bd444
BP
583 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
584 i++, n--)
9b8c81d1 585 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 586 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
587 i--;
588 gfs2_buffer_copy_tail(mp->mp_bh[i],
589 sizeof(struct gfs2_meta_header),
590 dibh, sizeof(struct gfs2_dinode));
591 gfs2_buffer_clear_tail(dibh,
592 sizeof(struct gfs2_dinode) +
593 sizeof(__be64));
594 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
595 sizeof(struct gfs2_meta_header));
596 *ptr = zero_bn;
597 state = ALLOC_GROW_DEPTH;
5f8bd444 598 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
599 if (mp->mp_bh[i] == NULL)
600 break;
601 brelse(mp->mp_bh[i]);
602 mp->mp_bh[i] = NULL;
603 }
5af4e7a0 604 i = branch_start;
9b8c81d1
SW
605 }
606 if (n == 0)
607 break;
608 /* Branching from existing tree */
609 case ALLOC_GROW_DEPTH:
5f8bd444 610 if (i > 1 && i < mp->mp_fheight)
350a9b0a 611 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 612 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
613 gfs2_indirect_init(mp, ip->i_gl, i,
614 mp->mp_list[i-1], bn++);
5f8bd444 615 if (i == mp->mp_fheight)
9b8c81d1
SW
616 state = ALLOC_DATA;
617 if (n == 0)
618 break;
619 /* Tree complete, adding data blocks */
620 case ALLOC_DATA:
3974320c 621 BUG_ON(n > dblks);
9b8c81d1 622 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 623 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 624 dblks = n;
9b8c81d1 625 ptr = metapointer(end_of_metadata, mp);
3974320c
BP
626 iomap->addr = bn << inode->i_blkbits;
627 iomap->flags |= IOMAP_F_NEW;
9b8c81d1
SW
628 while (n-- > 0)
629 *ptr++ = cpu_to_be64(bn++);
3974320c
BP
630 if (flags & IOMAP_ZERO) {
631 ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
632 dblks, GFP_NOFS);
64dd153c
BM
633 if (ret) {
634 fs_err(sdp,
635 "Failed to zero data buffers\n");
3974320c 636 flags &= ~IOMAP_ZERO;
64dd153c
BM
637 }
638 }
9b8c81d1
SW
639 break;
640 }
3974320c 641 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 642
3974320c 643 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 644 ip->i_height = mp->mp_fheight;
9b8c81d1
SW
645 gfs2_add_inode_blocks(&ip->i_inode, alloced);
646 gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
9b8c81d1
SW
647 return 0;
648}
649
b3b94faa 650/**
3974320c 651 * hole_size - figure out the size of a hole
fd88de56 652 * @inode: The inode
3974320c
BP
653 * @lblock: The logical starting block number
654 * @mp: The metapath
b3b94faa 655 *
3974320c 656 * Returns: The hole size in bytes
b3b94faa 657 *
b3b94faa 658 */
3974320c
BP
659static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
660{
661 struct gfs2_inode *ip = GFS2_I(inode);
662 struct gfs2_sbd *sdp = GFS2_SB(inode);
663 struct metapath mp_eof;
664 u64 factor = 1;
665 int hgt;
666 u64 holesz = 0;
667 const __be64 *first, *end, *ptr;
668 const struct buffer_head *bh;
669 u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
670 int zeroptrs;
671 bool done = false;
672
673 /* Get another metapath, to the very last byte */
674 find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
675 for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
676 bh = mp->mp_bh[hgt];
677 if (bh) {
678 zeroptrs = 0;
679 first = metapointer(hgt, mp);
680 end = (const __be64 *)(bh->b_data + bh->b_size);
681
682 for (ptr = first; ptr < end; ptr++) {
683 if (*ptr) {
684 done = true;
685 break;
686 } else {
687 zeroptrs++;
688 }
689 }
690 } else {
691 zeroptrs = sdp->sd_inptrs;
692 }
693 if (factor * zeroptrs >= lblock_stop - lblock + 1) {
694 holesz = lblock_stop - lblock + 1;
695 break;
696 }
697 holesz += factor * zeroptrs;
b3b94faa 698
3974320c
BP
699 factor *= sdp->sd_inptrs;
700 if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
701 (mp->mp_list[hgt - 1])++;
702 }
703 return holesz << inode->i_blkbits;
704}
705
706static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
707{
708 struct gfs2_inode *ip = GFS2_I(inode);
709
710 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
711 sizeof(struct gfs2_dinode);
712 iomap->offset = 0;
713 iomap->length = i_size_read(inode);
714 iomap->type = IOMAP_MAPPED;
715 iomap->flags = IOMAP_F_DATA_INLINE;
716}
717
718/**
719 * gfs2_iomap_begin - Map blocks from an inode to disk blocks
720 * @inode: The inode
721 * @pos: Starting position in bytes
722 * @length: Length to map, in bytes
723 * @flags: iomap flags
724 * @iomap: The iomap structure
725 *
726 * Returns: errno
727 */
728int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
729 unsigned flags, struct iomap *iomap)
b3b94faa 730{
feaa7bba
SW
731 struct gfs2_inode *ip = GFS2_I(inode);
732 struct gfs2_sbd *sdp = GFS2_SB(inode);
3974320c 733 struct metapath mp = { .mp_aheight = 1, };
20cdc193 734 unsigned int factor = sdp->sd_sb.sb_bsize;
ecc30c79 735 const u64 *arr = sdp->sd_heightsize;
9b8c81d1 736 __be64 *ptr;
3974320c
BP
737 sector_t lblock;
738 sector_t lend;
9b8c81d1
SW
739 int ret;
740 int eob;
741 unsigned int len;
742 struct buffer_head *bh;
743 u8 height;
7276b3b0 744
3974320c
BP
745 trace_gfs2_iomap_start(ip, pos, length, flags);
746 if (!length) {
747 ret = -EINVAL;
748 goto out;
749 }
b3b94faa 750
3974320c
BP
751 if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) {
752 gfs2_stuffed_iomap(inode, iomap);
753 if (pos >= iomap->length)
754 return -ENOENT;
755 ret = 0;
756 goto out;
757 }
758
759 lblock = pos >> inode->i_blkbits;
760 lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
761
762 iomap->offset = lblock << inode->i_blkbits;
763 iomap->addr = IOMAP_NULL_ADDR;
764 iomap->type = IOMAP_HOLE;
765 iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
766 iomap->flags = IOMAP_F_MERGED;
767 bmap_lock(ip, 0);
20cdc193
AG
768
769 /*
770 * Directory data blocks have a struct gfs2_meta_header header, so the
771 * remaining size is smaller than the filesystem block size. Logical
772 * block numbers for directories are in units of this remaining size!
773 */
ecc30c79 774 if (gfs2_is_dir(ip)) {
20cdc193 775 factor = sdp->sd_jbsize;
ecc30c79
SW
776 arr = sdp->sd_jheightsize;
777 }
4cf1ed81 778
9b8c81d1
SW
779 ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
780 if (ret)
3974320c 781 goto out_release;
b3b94faa 782
9b8c81d1 783 height = ip->i_height;
3974320c 784 while ((lblock + 1) * factor > arr[height])
9b8c81d1
SW
785 height++;
786 find_metapath(sdp, lblock, &mp, height);
9b8c81d1
SW
787 if (height > ip->i_height || gfs2_is_stuffed(ip))
788 goto do_alloc;
3974320c 789
9b8c81d1
SW
790 ret = lookup_metapath(ip, &mp);
791 if (ret < 0)
3974320c
BP
792 goto out_release;
793
5f8bd444 794 if (mp.mp_aheight != ip->i_height)
9b8c81d1 795 goto do_alloc;
3974320c 796
9b8c81d1
SW
797 ptr = metapointer(ip->i_height - 1, &mp);
798 if (*ptr == 0)
799 goto do_alloc;
3974320c
BP
800
801 iomap->type = IOMAP_MAPPED;
802 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
803
9b8c81d1 804 bh = mp.mp_bh[ip->i_height - 1];
3974320c 805 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
9b8c81d1 806 if (eob)
3974320c
BP
807 iomap->flags |= IOMAP_F_BOUNDARY;
808 iomap->length = (u64)len << inode->i_blkbits;
809
9b8c81d1 810 ret = 0;
3974320c
BP
811
812out_release:
9b8c81d1 813 release_metapath(&mp);
3974320c
BP
814 bmap_unlock(ip, 0);
815out:
816 trace_gfs2_iomap_end(ip, iomap, ret);
9b8c81d1 817 return ret;
30cbf189 818
9b8c81d1 819do_alloc:
3974320c
BP
820 if (!(flags & IOMAP_WRITE)) {
821 if (pos >= i_size_read(inode)) {
822 ret = -ENOENT;
823 goto out_release;
824 }
9b8c81d1 825 ret = 0;
3974320c
BP
826 iomap->length = hole_size(inode, lblock, &mp);
827 goto out_release;
b3b94faa 828 }
9b8c81d1 829
3974320c
BP
830 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
831 goto out_release;
832}
833
834/**
835 * gfs2_block_map - Map a block from an inode to a disk block
836 * @inode: The inode
837 * @lblock: The logical block number
838 * @bh_map: The bh to be mapped
839 * @create: True if its ok to alloc blocks to satify the request
840 *
841 * Sets buffer_mapped() if successful, sets buffer_boundary() if a
842 * read of metadata will be required before the next block can be
843 * mapped. Sets buffer_new() if new blocks were allocated.
844 *
845 * Returns: errno
846 */
847
848int gfs2_block_map(struct inode *inode, sector_t lblock,
849 struct buffer_head *bh_map, int create)
850{
851 struct gfs2_inode *ip = GFS2_I(inode);
852 struct iomap iomap;
853 int ret, flags = 0;
854
855 clear_buffer_mapped(bh_map);
856 clear_buffer_new(bh_map);
857 clear_buffer_boundary(bh_map);
858 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
859
860 if (create)
861 flags |= IOMAP_WRITE;
5f8bd444 862 if (buffer_zeronew(bh_map))
3974320c
BP
863 flags |= IOMAP_ZERO;
864 ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
865 bh_map->b_size, flags, &iomap);
866 if (ret) {
867 if (!create && ret == -ENOENT) {
868 /* Return unmapped buffer beyond the end of file. */
869 ret = 0;
870 }
871 goto out;
872 }
873
874 if (iomap.length > bh_map->b_size) {
875 iomap.length = bh_map->b_size;
876 iomap.flags &= ~IOMAP_F_BOUNDARY;
5f8bd444 877 }
3974320c
BP
878 if (iomap.addr != IOMAP_NULL_ADDR)
879 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
880 bh_map->b_size = iomap.length;
881 if (iomap.flags & IOMAP_F_BOUNDARY)
882 set_buffer_boundary(bh_map);
883 if (iomap.flags & IOMAP_F_NEW)
884 set_buffer_new(bh_map);
885
886out:
887 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
888 return ret;
fd88de56
SW
889}
890
941e6d7d
SW
891/*
892 * Deprecated: do not use in new code
893 */
fd88de56
SW
894int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
895{
23591256 896 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 897 int ret;
fd88de56
SW
898 int create = *new;
899
900 BUG_ON(!extlen);
901 BUG_ON(!dblock);
902 BUG_ON(!new);
903
47a9a527 904 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 905 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
906 *extlen = bh.b_size >> inode->i_blkbits;
907 *dblock = bh.b_blocknr;
908 if (buffer_new(&bh))
909 *new = 1;
910 else
911 *new = 0;
912 return ret;
b3b94faa
DT
913}
914
ba7f7290
SW
915/**
916 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
917 *
918 * This is partly borrowed from ext3.
919 */
ff8f33c8 920static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
ba7f7290
SW
921{
922 struct inode *inode = mapping->host;
923 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
924 unsigned long index = from >> PAGE_SHIFT;
925 unsigned offset = from & (PAGE_SIZE-1);
ba7f7290
SW
926 unsigned blocksize, iblock, length, pos;
927 struct buffer_head *bh;
928 struct page *page;
ba7f7290
SW
929 int err;
930
220cca2a 931 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
932 if (!page)
933 return 0;
934
935 blocksize = inode->i_sb->s_blocksize;
936 length = blocksize - (offset & (blocksize - 1));
09cbfeaf 937 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
938
939 if (!page_has_buffers(page))
940 create_empty_buffers(page, blocksize, 0);
941
942 /* Find the buffer that contains "offset" */
943 bh = page_buffers(page);
944 pos = blocksize;
945 while (offset >= pos) {
946 bh = bh->b_this_page;
947 iblock++;
948 pos += blocksize;
949 }
950
951 err = 0;
952
953 if (!buffer_mapped(bh)) {
e9e1ef2b 954 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
955 /* unmapped? It's a hole - nothing to do */
956 if (!buffer_mapped(bh))
957 goto unlock;
958 }
959
960 /* Ok, it's mapped. Make sure it's up-to-date */
961 if (PageUptodate(page))
962 set_buffer_uptodate(bh);
963
964 if (!buffer_uptodate(bh)) {
965 err = -EIO;
dfec8a14 966 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
967 wait_on_buffer(bh);
968 /* Uhhuh. Read error. Complain and punt. */
969 if (!buffer_uptodate(bh))
970 goto unlock;
1875f2f3 971 err = 0;
ba7f7290
SW
972 }
973
bf36a713 974 if (!gfs2_is_writeback(ip))
350a9b0a 975 gfs2_trans_add_data(ip->i_gl, bh);
ba7f7290 976
eebd2aa3 977 zero_user(page, offset, length);
40bc9a27 978 mark_buffer_dirty(bh);
ba7f7290
SW
979unlock:
980 unlock_page(page);
09cbfeaf 981 put_page(page);
ba7f7290
SW
982 return err;
983}
984
c62baf65
FF
985#define GFS2_JTRUNC_REVOKES 8192
986
fa731fc4
SW
987/**
988 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
989 * @inode: The inode being truncated
990 * @oldsize: The original (larger) size
991 * @newsize: The new smaller size
992 *
993 * With jdata files, we have to journal a revoke for each block which is
994 * truncated. As a result, we need to split this into separate transactions
995 * if the number of pages being truncated gets too large.
996 */
997
fa731fc4
SW
998static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
999{
1000 struct gfs2_sbd *sdp = GFS2_SB(inode);
1001 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1002 u64 chunk;
1003 int error;
1004
1005 while (oldsize != newsize) {
e7fdf004
AG
1006 struct gfs2_trans *tr;
1007 unsigned int offs;
1008
fa731fc4
SW
1009 chunk = oldsize - newsize;
1010 if (chunk > max_chunk)
1011 chunk = max_chunk;
e7fdf004
AG
1012
1013 offs = oldsize & ~PAGE_MASK;
1014 if (offs && chunk > PAGE_SIZE)
1015 chunk = offs + ((chunk - offs) & PAGE_MASK);
1016
7caef267 1017 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1018 oldsize -= chunk;
e7fdf004
AG
1019
1020 tr = current->journal_info;
1021 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1022 continue;
1023
fa731fc4
SW
1024 gfs2_trans_end(sdp);
1025 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1026 if (error)
1027 return error;
1028 }
1029
1030 return 0;
1031}
1032
8b5860a3 1033static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1034{
ff8f33c8
SW
1035 struct gfs2_inode *ip = GFS2_I(inode);
1036 struct gfs2_sbd *sdp = GFS2_SB(inode);
1037 struct address_space *mapping = inode->i_mapping;
80990f40 1038 struct buffer_head *dibh = NULL;
b3b94faa 1039 int journaled = gfs2_is_jdata(ip);
8b5860a3 1040 u64 oldsize = inode->i_size;
b3b94faa
DT
1041 int error;
1042
fa731fc4
SW
1043 if (journaled)
1044 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1045 else
1046 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1047 if (error)
1048 return error;
1049
1050 error = gfs2_meta_inode_buffer(ip, &dibh);
1051 if (error)
1052 goto out;
1053
350a9b0a 1054 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1055
b3b94faa 1056 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1057 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1058 } else {
ff8f33c8
SW
1059 if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) {
1060 error = gfs2_block_truncate_page(mapping, newsize);
1061 if (error)
80990f40 1062 goto out;
b3b94faa 1063 }
ff8f33c8 1064 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1065 }
1066
ff8f33c8 1067 i_size_write(inode, newsize);
078cd827 1068 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1069 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1070
fa731fc4
SW
1071 if (journaled)
1072 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1073 else
7caef267 1074 truncate_pagecache(inode, newsize);
fa731fc4 1075
a91ea69f 1076out:
80990f40
AG
1077 brelse(dibh);
1078 if (current->journal_info)
1079 gfs2_trans_end(sdp);
b3b94faa
DT
1080 return error;
1081}
1082
d552a2b9
BP
1083/**
1084 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1085 * @ip: inode
1086 * @rg_gh: holder of resource group glock
1087 * @mp: current metapath fully populated with buffers
1088 * @btotal: place to keep count of total blocks freed
1089 * @hgt: height we're processing
1090 * @first: true if this is the first call to this function for this height
1091 *
1092 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1093 * free, and free them all. However, we do it one rgrp at a time. If this
1094 * block has references to multiple rgrps, we break it into individual
1095 * transactions. This allows other processes to use the rgrps while we're
1096 * focused on a single one, for better concurrency / performance.
1097 * At every transaction boundary, we rewrite the inode into the journal.
1098 * That way the bitmaps are kept consistent with the inode and we can recover
1099 * if we're interrupted by power-outages.
1100 *
1101 * Returns: 0, or return code if an error occurred.
1102 * *btotal has the total number of blocks freed
1103 */
1104static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1105 const struct metapath *mp, u32 *btotal, int hgt,
1106 bool preserve1)
b3b94faa 1107{
9b8c81d1 1108 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1109 struct gfs2_rgrpd *rgd;
1110 struct gfs2_trans *tr;
1111 struct buffer_head *bh = mp->mp_bh[hgt];
1112 __be64 *top, *bottom, *p;
1113 int blks_outside_rgrp;
1114 u64 bn, bstart, isize_blks;
1115 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1116 int meta = ((hgt != ip->i_height - 1) ? 1 : 0);
1117 int ret = 0;
1118 bool buf_in_tr = false; /* buffer was added to transaction */
1119
1120 if (gfs2_metatype_check(sdp, bh,
1121 (hgt ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)))
1122 return -EIO;
1123
1124more_rgrps:
1125 blks_outside_rgrp = 0;
1126 bstart = 0;
1127 blen = 0;
1128 top = metapointer(hgt, mp); /* first ptr from metapath */
1129 /* If we're keeping some data at the truncation point, we've got to
1130 preserve the metadata tree by adding 1 to the starting metapath. */
1131 if (preserve1)
1132 top++;
1133
1134 bottom = (__be64 *)(bh->b_data + bh->b_size);
1135
1136 for (p = top; p < bottom; p++) {
1137 if (!*p)
1138 continue;
1139 bn = be64_to_cpu(*p);
1140 if (gfs2_holder_initialized(rd_gh)) {
6f6597ba 1141 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
d552a2b9
BP
1142 gfs2_assert_withdraw(sdp,
1143 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1144 } else {
90bcab99 1145 rgd = gfs2_blk2rgrpd(sdp, bn, true);
d552a2b9
BP
1146 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1147 0, rd_gh);
1148 if (ret)
1149 goto out;
1150
1151 /* Must be done with the rgrp glock held: */
1152 if (gfs2_rs_active(&ip->i_res) &&
1153 rgd == ip->i_res.rs_rbm.rgd)
1154 gfs2_rs_deltree(&ip->i_res);
1155 }
1156
1157 if (!rgrp_contains_block(rgd, bn)) {
1158 blks_outside_rgrp++;
1159 continue;
1160 }
1161
1162 /* The size of our transactions will be unknown until we
1163 actually process all the metadata blocks that relate to
1164 the rgrp. So we estimate. We know it can't be more than
1165 the dinode's i_blocks and we don't want to exceed the
1166 journal flush threshold, sd_log_thresh2. */
1167 if (current->journal_info == NULL) {
1168 unsigned int jblocks_rqsted, revokes;
1169
1170 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1171 RES_INDIRECT;
1172 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1173 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1174 jblocks_rqsted +=
1175 atomic_read(&sdp->sd_log_thresh2);
1176 else
1177 jblocks_rqsted += isize_blks;
1178 revokes = jblocks_rqsted;
1179 if (meta)
1180 revokes += hptrs(sdp, hgt);
1181 else if (ip->i_depth)
1182 revokes += sdp->sd_inptrs;
1183 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1184 if (ret)
1185 goto out_unlock;
1186 down_write(&ip->i_rw_mutex);
1187 }
1188 /* check if we will exceed the transaction blocks requested */
1189 tr = current->journal_info;
1190 if (tr->tr_num_buf_new + RES_STATFS +
1191 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1192 /* We set blks_outside_rgrp to ensure the loop will
1193 be repeated for the same rgrp, but with a new
1194 transaction. */
1195 blks_outside_rgrp++;
1196 /* This next part is tricky. If the buffer was added
1197 to the transaction, we've already set some block
1198 pointers to 0, so we better follow through and free
1199 them, or we will introduce corruption (so break).
1200 This may be impossible, or at least rare, but I
1201 decided to cover the case regardless.
1202
1203 If the buffer was not added to the transaction
1204 (this call), doing so would exceed our transaction
1205 size, so we need to end the transaction and start a
1206 new one (so goto). */
1207
1208 if (buf_in_tr)
1209 break;
1210 goto out_unlock;
1211 }
1212
1213 gfs2_trans_add_meta(ip->i_gl, bh);
1214 buf_in_tr = true;
1215 *p = 0;
1216 if (bstart + blen == bn) {
1217 blen++;
1218 continue;
1219 }
1220 if (bstart) {
1221 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1222 (*btotal) += blen;
1223 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1224 }
1225 bstart = bn;
1226 blen = 1;
1227 }
1228 if (bstart) {
1229 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1230 (*btotal) += blen;
1231 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1232 }
1233out_unlock:
1234 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1235 outside the rgrp we just processed,
1236 do it all over again. */
1237 if (current->journal_info) {
1238 struct buffer_head *dibh = mp->mp_bh[0];
1239
1240 /* Every transaction boundary, we rewrite the dinode
1241 to keep its di_blocks current in case of failure. */
1242 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1243 current_time(&ip->i_inode);
d552a2b9
BP
1244 gfs2_trans_add_meta(ip->i_gl, dibh);
1245 gfs2_dinode_out(ip, dibh->b_data);
1246 up_write(&ip->i_rw_mutex);
1247 gfs2_trans_end(sdp);
1248 }
1249 gfs2_glock_dq_uninit(rd_gh);
1250 cond_resched();
1251 goto more_rgrps;
1252 }
1253out:
1254 return ret;
1255}
1256
1257/**
1258 * find_nonnull_ptr - find a non-null pointer given a metapath and height
1259 * assumes the metapath is valid (with buffers) out to height h
1260 * @mp: starting metapath
1261 * @h: desired height to search
1262 *
1263 * Returns: true if a non-null pointer was found in the metapath buffer
1264 * false if all remaining pointers are NULL in the buffer
1265 */
1266static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1267 unsigned int h)
1268{
1269 __be64 *ptr;
1270 unsigned int ptrs = hptrs(sdp, h) - 1;
1271
1272 while (true) {
1273 ptr = metapointer(h, mp);
c4a9d189
BP
1274 if (*ptr) { /* if we have a non-null pointer */
1275 /* Now zero the metapath after the current height. */
1276 h++;
1277 if (h < GFS2_MAX_META_HEIGHT)
1278 memset(&mp->mp_list[h], 0,
1279 (GFS2_MAX_META_HEIGHT - h) *
1280 sizeof(mp->mp_list[0]));
d552a2b9 1281 return true;
c4a9d189 1282 }
d552a2b9
BP
1283
1284 if (mp->mp_list[h] < ptrs)
1285 mp->mp_list[h]++;
1286 else
1287 return false; /* no more pointers in this buffer */
1288 }
1289}
1290
1291enum dealloc_states {
1292 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1293 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1294 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1295 DEALLOC_DONE = 3, /* process complete */
1296};
b3b94faa 1297
c4a9d189
BP
1298static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h)
1299{
1300 if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0])))
1301 return false;
1302 return true;
1303}
1304
d552a2b9
BP
1305/**
1306 * trunc_dealloc - truncate a file down to a desired size
1307 * @ip: inode to truncate
1308 * @newsize: The desired size of the file
1309 *
1310 * This function truncates a file to newsize. It works from the
1311 * bottom up, and from the right to the left. In other words, it strips off
1312 * the highest layer (data) before stripping any of the metadata. Doing it
1313 * this way is best in case the operation is interrupted by power failure, etc.
1314 * The dinode is rewritten in every transaction to guarantee integrity.
1315 */
1316static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
1317{
1318 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1319 struct metapath mp;
1320 struct buffer_head *dibh, *bh;
1321 struct gfs2_holder rd_gh;
1322 u64 lblock;
1323 __u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */
1324 unsigned int strip_h = ip->i_height - 1;
1325 u32 btotal = 0;
1326 int ret, state;
1327 int mp_h; /* metapath buffers are read in to this height */
1328 sector_t last_ra = 0;
1329 u64 prev_bnr = 0;
1330 bool preserve1; /* need to preserve the first meta pointer? */
1331
1332 if (!newsize)
b3b94faa 1333 lblock = 0;
18ec7d5c 1334 else
d552a2b9 1335 lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift;
b3b94faa 1336
d552a2b9 1337 memset(&mp, 0, sizeof(mp));
9b8c81d1 1338 find_metapath(sdp, lblock, &mp, ip->i_height);
b3b94faa 1339
d552a2b9
BP
1340 memcpy(&nbof, &mp.mp_list, sizeof(nbof));
1341
1342 ret = gfs2_meta_inode_buffer(ip, &dibh);
1343 if (ret)
1344 return ret;
b3b94faa 1345
d552a2b9
BP
1346 mp.mp_bh[0] = dibh;
1347 ret = lookup_metapath(ip, &mp);
1348 if (ret == ip->i_height)
1349 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1350 else
1351 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1352
d552a2b9
BP
1353 ret = gfs2_rindex_update(sdp);
1354 if (ret)
1355 goto out_metapath;
1356
1357 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1358 if (ret)
1359 goto out_metapath;
1360 gfs2_holder_mark_uninitialized(&rd_gh);
1361
1362 mp_h = strip_h;
1363
1364 while (state != DEALLOC_DONE) {
1365 switch (state) {
1366 /* Truncate a full metapath at the given strip height.
1367 * Note that strip_h == mp_h in order to be in this state. */
1368 case DEALLOC_MP_FULL:
1369 if (mp_h > 0) { /* issue read-ahead on metadata */
1370 __be64 *top;
1371
1372 bh = mp.mp_bh[mp_h - 1];
1373 if (bh->b_blocknr != last_ra) {
1374 last_ra = bh->b_blocknr;
1375 top = metaptr1(mp_h - 1, &mp);
1376 gfs2_metapath_ra(ip->i_gl, bh, top);
1377 }
1378 }
1379 /* If we're truncating to a non-zero size and the mp is
1380 at the beginning of file for the strip height, we
1381 need to preserve the first metadata pointer. */
c4a9d189 1382 preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
d552a2b9
BP
1383 bh = mp.mp_bh[mp_h];
1384 gfs2_assert_withdraw(sdp, bh);
1385 if (gfs2_assert_withdraw(sdp,
1386 prev_bnr != bh->b_blocknr)) {
1387 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1388 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1389 sdp->sd_fsname,
1390 (unsigned long long)ip->i_no_addr,
1391 prev_bnr, ip->i_height, strip_h, mp_h);
1392 }
1393 prev_bnr = bh->b_blocknr;
1394 ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal,
1395 mp_h, preserve1);
1396 /* If we hit an error or just swept dinode buffer,
1397 just exit. */
1398 if (ret || !mp_h) {
1399 state = DEALLOC_DONE;
1400 break;
1401 }
1402 state = DEALLOC_MP_LOWER;
1403 break;
1404
1405 /* lower the metapath strip height */
1406 case DEALLOC_MP_LOWER:
1407 /* We're done with the current buffer, so release it,
1408 unless it's the dinode buffer. Then back up to the
1409 previous pointer. */
1410 if (mp_h) {
1411 brelse(mp.mp_bh[mp_h]);
1412 mp.mp_bh[mp_h] = NULL;
1413 }
1414 /* If we can't get any lower in height, we've stripped
1415 off all we can. Next step is to back up and start
1416 stripping the previous level of metadata. */
1417 if (mp_h == 0) {
1418 strip_h--;
1419 memcpy(&mp.mp_list, &nbof, sizeof(nbof));
1420 mp_h = strip_h;
1421 state = DEALLOC_FILL_MP;
1422 break;
1423 }
1424 mp.mp_list[mp_h] = 0;
1425 mp_h--; /* search one metadata height down */
1426 if (mp.mp_list[mp_h] >= hptrs(sdp, mp_h) - 1)
1427 break; /* loop around in the same state */
1428 mp.mp_list[mp_h]++;
1429 /* Here we've found a part of the metapath that is not
1430 * allocated. We need to search at that height for the
1431 * next non-null pointer. */
1432 if (find_nonnull_ptr(sdp, &mp, mp_h)) {
1433 state = DEALLOC_FILL_MP;
1434 mp_h++;
1435 }
1436 /* No more non-null pointers at this height. Back up
1437 to the previous height and try again. */
1438 break; /* loop around in the same state */
1439
1440 /* Fill the metapath with buffers to the given height. */
1441 case DEALLOC_FILL_MP:
1442 /* Fill the buffers out to the current height. */
1443 ret = fillup_metapath(ip, &mp, mp_h);
1444 if (ret < 0)
1445 goto out;
1446
1447 /* If buffers found for the entire strip height */
1448 if ((ret == ip->i_height) && (mp_h == strip_h)) {
1449 state = DEALLOC_MP_FULL;
1450 break;
1451 }
1452 if (ret < ip->i_height) /* We have a partial height */
1453 mp_h = ret - 1;
1454
1455 /* If we find a non-null block pointer, crawl a bit
1456 higher up in the metapath and try again, otherwise
1457 we need to look lower for a new starting point. */
1458 if (find_nonnull_ptr(sdp, &mp, mp_h))
1459 mp_h++;
1460 else
1461 state = DEALLOC_MP_LOWER;
b3b94faa 1462 break;
d552a2b9 1463 }
b3b94faa
DT
1464 }
1465
d552a2b9
BP
1466 if (btotal) {
1467 if (current->journal_info == NULL) {
1468 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1469 RES_QUOTA, 0);
1470 if (ret)
1471 goto out;
1472 down_write(&ip->i_rw_mutex);
1473 }
1474 gfs2_statfs_change(sdp, 0, +btotal, 0);
1475 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1476 ip->i_inode.i_gid);
b32c8c76 1477 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1478 gfs2_trans_add_meta(ip->i_gl, dibh);
1479 gfs2_dinode_out(ip, dibh->b_data);
1480 up_write(&ip->i_rw_mutex);
1481 gfs2_trans_end(sdp);
1482 }
b3b94faa 1483
d552a2b9
BP
1484out:
1485 if (gfs2_holder_initialized(&rd_gh))
1486 gfs2_glock_dq_uninit(&rd_gh);
1487 if (current->journal_info) {
1488 up_write(&ip->i_rw_mutex);
1489 gfs2_trans_end(sdp);
1490 cond_resched();
1491 }
1492 gfs2_quota_unhold(ip);
1493out_metapath:
1494 release_metapath(&mp);
1495 return ret;
b3b94faa
DT
1496}
1497
1498static int trunc_end(struct gfs2_inode *ip)
1499{
feaa7bba 1500 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
1501 struct buffer_head *dibh;
1502 int error;
1503
1504 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1505 if (error)
1506 return error;
1507
1508 down_write(&ip->i_rw_mutex);
1509
1510 error = gfs2_meta_inode_buffer(ip, &dibh);
1511 if (error)
1512 goto out;
1513
a2e0f799 1514 if (!i_size_read(&ip->i_inode)) {
ecc30c79 1515 ip->i_height = 0;
ce276b06 1516 ip->i_goal = ip->i_no_addr;
b3b94faa 1517 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 1518 gfs2_ordered_del_inode(ip);
b3b94faa 1519 }
078cd827 1520 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 1521 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1522
350a9b0a 1523 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 1524 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
1525 brelse(dibh);
1526
a91ea69f 1527out:
b3b94faa 1528 up_write(&ip->i_rw_mutex);
b3b94faa 1529 gfs2_trans_end(sdp);
b3b94faa
DT
1530 return error;
1531}
1532
1533/**
1534 * do_shrink - make a file smaller
ff8f33c8 1535 * @inode: the inode
ff8f33c8 1536 * @newsize: the size to make the file
b3b94faa 1537 *
ff8f33c8
SW
1538 * Called with an exclusive lock on @inode. The @size must
1539 * be equal to or smaller than the current inode size.
b3b94faa
DT
1540 *
1541 * Returns: errno
1542 */
1543
8b5860a3 1544static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 1545{
ff8f33c8 1546 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
1547 int error;
1548
8b5860a3 1549 error = trunc_start(inode, newsize);
b3b94faa
DT
1550 if (error < 0)
1551 return error;
ff8f33c8 1552 if (gfs2_is_stuffed(ip))
b3b94faa
DT
1553 return 0;
1554
ff8f33c8
SW
1555 error = trunc_dealloc(ip, newsize);
1556 if (error == 0)
b3b94faa
DT
1557 error = trunc_end(ip);
1558
1559 return error;
1560}
1561
ff8f33c8 1562void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 1563{
ff8f33c8
SW
1564 int ret;
1565
8b5860a3 1566 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
1567 WARN_ON(ret != 0);
1568}
1569
1570/**
1571 * do_grow - Touch and update inode size
1572 * @inode: The inode
1573 * @size: The new size
1574 *
1575 * This function updates the timestamps on the inode and
1576 * may also increase the size of the inode. This function
1577 * must not be called with @size any smaller than the current
1578 * inode size.
1579 *
1580 * Although it is not strictly required to unstuff files here,
1581 * earlier versions of GFS2 have a bug in the stuffed file reading
1582 * code which will result in a buffer overrun if the size is larger
1583 * than the max stuffed file size. In order to prevent this from
25985edc 1584 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
1585 * just update the inode size directly.
1586 *
1587 * Returns: 0 on success, or -ve on error
1588 */
1589
1590static int do_grow(struct inode *inode, u64 size)
1591{
1592 struct gfs2_inode *ip = GFS2_I(inode);
1593 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 1594 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
1595 struct buffer_head *dibh;
1596 int error;
2f7ee358 1597 int unstuff = 0;
a13b8c5f 1598
ff8f33c8
SW
1599 if (gfs2_is_stuffed(ip) &&
1600 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
b8fbf471 1601 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 1602 if (error)
5407e242 1603 return error;
ff8f33c8 1604
7b9cff46 1605 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
1606 if (error)
1607 goto do_grow_qunlock;
2f7ee358 1608 unstuff = 1;
ff8f33c8
SW
1609 }
1610
a01aedfe
BP
1611 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1612 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1613 0 : RES_QUOTA), 0);
a13b8c5f 1614 if (error)
ff8f33c8 1615 goto do_grow_release;
a13b8c5f 1616
2f7ee358 1617 if (unstuff) {
ff8f33c8
SW
1618 error = gfs2_unstuff_dinode(ip, NULL);
1619 if (error)
1620 goto do_end_trans;
1621 }
a13b8c5f
WC
1622
1623 error = gfs2_meta_inode_buffer(ip, &dibh);
1624 if (error)
ff8f33c8 1625 goto do_end_trans;
a13b8c5f 1626
ff8f33c8 1627 i_size_write(inode, size);
078cd827 1628 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 1629 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
1630 gfs2_dinode_out(ip, dibh->b_data);
1631 brelse(dibh);
1632
ff8f33c8 1633do_end_trans:
a13b8c5f 1634 gfs2_trans_end(sdp);
ff8f33c8 1635do_grow_release:
2f7ee358 1636 if (unstuff) {
ff8f33c8
SW
1637 gfs2_inplace_release(ip);
1638do_grow_qunlock:
1639 gfs2_quota_unlock(ip);
ff8f33c8 1640 }
a13b8c5f
WC
1641 return error;
1642}
1643
b3b94faa 1644/**
ff8f33c8
SW
1645 * gfs2_setattr_size - make a file a given size
1646 * @inode: the inode
1647 * @newsize: the size to make the file
b3b94faa 1648 *
ff8f33c8
SW
1649 * The file size can grow, shrink, or stay the same size. This
1650 * is called holding i_mutex and an exclusive glock on the inode
1651 * in question.
b3b94faa
DT
1652 *
1653 * Returns: errno
1654 */
1655
ff8f33c8 1656int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 1657{
af5c2697 1658 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 1659 int ret;
b3b94faa 1660
ff8f33c8 1661 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 1662
ff8f33c8
SW
1663 ret = inode_newsize_ok(inode, newsize);
1664 if (ret)
1665 return ret;
b3b94faa 1666
562c72aa
CH
1667 inode_dio_wait(inode);
1668
b54e9a0b 1669 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 1670 if (ret)
2b3dcf35 1671 goto out;
d2b47cfb 1672
8b5860a3 1673 if (newsize >= inode->i_size) {
2b3dcf35
BP
1674 ret = do_grow(inode, newsize);
1675 goto out;
1676 }
ff8f33c8 1677
8b5860a3 1678 ret = do_shrink(inode, newsize);
2b3dcf35 1679out:
a097dc7e 1680 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 1681 return ret;
b3b94faa
DT
1682}
1683
1684int gfs2_truncatei_resume(struct gfs2_inode *ip)
1685{
1686 int error;
a2e0f799 1687 error = trunc_dealloc(ip, i_size_read(&ip->i_inode));
b3b94faa
DT
1688 if (!error)
1689 error = trunc_end(ip);
1690 return error;
1691}
1692
1693int gfs2_file_dealloc(struct gfs2_inode *ip)
1694{
1695 return trunc_dealloc(ip, 0);
1696}
1697
b50f227b
SW
1698/**
1699 * gfs2_free_journal_extents - Free cached journal bmap info
1700 * @jd: The journal
1701 *
1702 */
1703
1704void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1705{
1706 struct gfs2_journal_extent *jext;
1707
1708 while(!list_empty(&jd->extent_list)) {
1709 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1710 list_del(&jext->list);
1711 kfree(jext);
1712 }
1713}
1714
1715/**
1716 * gfs2_add_jextent - Add or merge a new extent to extent cache
1717 * @jd: The journal descriptor
1718 * @lblock: The logical block at start of new extent
c62baf65 1719 * @dblock: The physical block at start of new extent
b50f227b
SW
1720 * @blocks: Size of extent in fs blocks
1721 *
1722 * Returns: 0 on success or -ENOMEM
1723 */
1724
1725static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1726{
1727 struct gfs2_journal_extent *jext;
1728
1729 if (!list_empty(&jd->extent_list)) {
1730 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1731 if ((jext->dblock + jext->blocks) == dblock) {
1732 jext->blocks += blocks;
1733 return 0;
1734 }
1735 }
1736
1737 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1738 if (jext == NULL)
1739 return -ENOMEM;
1740 jext->dblock = dblock;
1741 jext->lblock = lblock;
1742 jext->blocks = blocks;
1743 list_add_tail(&jext->list, &jd->extent_list);
1744 jd->nr_extents++;
1745 return 0;
1746}
1747
1748/**
1749 * gfs2_map_journal_extents - Cache journal bmap info
1750 * @sdp: The super block
1751 * @jd: The journal to map
1752 *
1753 * Create a reusable "extent" mapping from all logical
1754 * blocks to all physical blocks for the given journal. This will save
1755 * us time when writing journal blocks. Most journals will have only one
1756 * extent that maps all their logical blocks. That's because gfs2.mkfs
1757 * arranges the journal blocks sequentially to maximize performance.
1758 * So the extent would map the first block for the entire file length.
1759 * However, gfs2_jadd can happen while file activity is happening, so
1760 * those journals may not be sequential. Less likely is the case where
1761 * the users created their own journals by mounting the metafs and
1762 * laying it out. But it's still possible. These journals might have
1763 * several extents.
1764 *
1765 * Returns: 0 on success, or error on failure
1766 */
1767
1768int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1769{
1770 u64 lblock = 0;
1771 u64 lblock_stop;
1772 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1773 struct buffer_head bh;
1774 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1775 u64 size;
1776 int rc;
1777
1778 lblock_stop = i_size_read(jd->jd_inode) >> shift;
1779 size = (lblock_stop - lblock) << shift;
1780 jd->nr_extents = 0;
1781 WARN_ON(!list_empty(&jd->extent_list));
1782
1783 do {
1784 bh.b_state = 0;
1785 bh.b_blocknr = 0;
1786 bh.b_size = size;
1787 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1788 if (rc || !buffer_mapped(&bh))
1789 goto fail;
1790 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1791 if (rc)
1792 goto fail;
1793 size -= bh.b_size;
1794 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1795 } while(size > 0);
1796
1797 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1798 jd->nr_extents);
1799 return 0;
1800
1801fail:
1802 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1803 rc, jd->jd_jid,
1804 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1805 jd->nr_extents);
1806 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1807 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1808 bh.b_state, (unsigned long long)bh.b_size);
1809 gfs2_free_journal_extents(jd);
1810 return rc;
1811}
1812
b3b94faa
DT
1813/**
1814 * gfs2_write_alloc_required - figure out if a write will require an allocation
1815 * @ip: the file being written to
1816 * @offset: the offset to write to
1817 * @len: the number of bytes being written
b3b94faa 1818 *
461cb419 1819 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
1820 */
1821
cd915493 1822int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 1823 unsigned int len)
b3b94faa 1824{
feaa7bba 1825 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
1826 struct buffer_head bh;
1827 unsigned int shift;
1828 u64 lblock, lblock_stop, size;
7ed122e4 1829 u64 end_of_file;
b3b94faa 1830
b3b94faa
DT
1831 if (!len)
1832 return 0;
1833
1834 if (gfs2_is_stuffed(ip)) {
1835 if (offset + len >
1836 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
461cb419 1837 return 1;
b3b94faa
DT
1838 return 0;
1839 }
1840
941e6d7d 1841 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 1842 BUG_ON(gfs2_is_dir(ip));
a2e0f799 1843 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
1844 lblock = offset >> shift;
1845 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1846 if (lblock_stop > end_of_file)
461cb419 1847 return 1;
b3b94faa 1848
941e6d7d
SW
1849 size = (lblock_stop - lblock) << shift;
1850 do {
1851 bh.b_state = 0;
1852 bh.b_size = size;
1853 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1854 if (!buffer_mapped(&bh))
461cb419 1855 return 1;
941e6d7d
SW
1856 size -= bh.b_size;
1857 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1858 } while(size > 0);
b3b94faa
DT
1859
1860 return 0;
1861}
1862