iomap: add private pointer to struct iomap
[linux-2.6-block.git] / fs / gfs2 / bmap.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
b3b94faa
DT
10#include <linux/spinlock.h>
11#include <linux/completion.h>
12#include <linux/buffer_head.h>
64dd153c 13#include <linux/blkdev.h>
5c676f6d 14#include <linux/gfs2_ondisk.h>
71b86f56 15#include <linux/crc32.h>
3974320c 16#include <linux/iomap.h>
b3b94faa
DT
17
18#include "gfs2.h"
5c676f6d 19#include "incore.h"
b3b94faa
DT
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
b3b94faa 23#include "meta_io.h"
b3b94faa
DT
24#include "quota.h"
25#include "rgrp.h"
45138990 26#include "log.h"
4c16c36a 27#include "super.h"
b3b94faa 28#include "trans.h"
18ec7d5c 29#include "dir.h"
5c676f6d 30#include "util.h"
63997775 31#include "trace_gfs2.h"
b3b94faa
DT
32
33/* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
35 * keep it small.
36 */
37struct metapath {
dbac6710 38 struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
b3b94faa 39 __u16 mp_list[GFS2_MAX_META_HEIGHT];
5f8bd444
BP
40 int mp_fheight; /* find_metapath height */
41 int mp_aheight; /* actual height (lookup height) */
b3b94faa
DT
42};
43
f25ef0c1
SW
44/**
45 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
46 * @ip: the inode
47 * @dibh: the dinode buffer
48 * @block: the block number that was allocated
ff8f33c8 49 * @page: The (optional) page. This is looked up if @page is NULL
f25ef0c1
SW
50 *
51 * Returns: errno
52 */
53
54static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
cd915493 55 u64 block, struct page *page)
f25ef0c1 56{
f25ef0c1
SW
57 struct inode *inode = &ip->i_inode;
58 struct buffer_head *bh;
59 int release = 0;
60
61 if (!page || page->index) {
220cca2a 62 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
f25ef0c1
SW
63 if (!page)
64 return -ENOMEM;
65 release = 1;
66 }
67
68 if (!PageUptodate(page)) {
69 void *kaddr = kmap(page);
602c89d2
SW
70 u64 dsize = i_size_read(inode);
71
235628c5
AG
72 if (dsize > gfs2_max_stuffed_size(ip))
73 dsize = gfs2_max_stuffed_size(ip);
f25ef0c1 74
602c89d2 75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
09cbfeaf 76 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
f25ef0c1
SW
77 kunmap(page);
78
79 SetPageUptodate(page);
80 }
81
82 if (!page_has_buffers(page))
47a9a527
FF
83 create_empty_buffers(page, BIT(inode->i_blkbits),
84 BIT(BH_Uptodate));
f25ef0c1
SW
85
86 bh = page_buffers(page);
87
88 if (!buffer_mapped(bh))
89 map_bh(bh, inode->i_sb, block);
90
91 set_buffer_uptodate(bh);
845802b1 92 if (gfs2_is_jdata(ip))
350a9b0a 93 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
94 else {
95 mark_buffer_dirty(bh);
96 gfs2_ordered_add_inode(ip);
97 }
f25ef0c1
SW
98
99 if (release) {
100 unlock_page(page);
09cbfeaf 101 put_page(page);
f25ef0c1
SW
102 }
103
104 return 0;
105}
106
b3b94faa
DT
107/**
108 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
109 * @ip: The GFS2 inode to unstuff
ff8f33c8 110 * @page: The (optional) page. This is looked up if the @page is NULL
b3b94faa
DT
111 *
112 * This routine unstuffs a dinode and returns it to a "normal" state such
113 * that the height can be grown in the traditional way.
114 *
115 * Returns: errno
116 */
117
f25ef0c1 118int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
b3b94faa
DT
119{
120 struct buffer_head *bh, *dibh;
48516ced 121 struct gfs2_dinode *di;
cd915493 122 u64 block = 0;
18ec7d5c 123 int isdir = gfs2_is_dir(ip);
b3b94faa
DT
124 int error;
125
126 down_write(&ip->i_rw_mutex);
127
128 error = gfs2_meta_inode_buffer(ip, &dibh);
129 if (error)
130 goto out;
907b9bce 131
a2e0f799 132 if (i_size_read(&ip->i_inode)) {
b3b94faa
DT
133 /* Get a free block, fill it with the stuffed data,
134 and write it out to disk */
135
b45e41d7 136 unsigned int n = 1;
6e87ed0f 137 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
09010978
SW
138 if (error)
139 goto out_brelse;
18ec7d5c 140 if (isdir) {
5731be53 141 gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
61e085a8 142 error = gfs2_dir_get_new_buffer(ip, block, &bh);
b3b94faa
DT
143 if (error)
144 goto out_brelse;
48516ced 145 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
b3b94faa
DT
146 dibh, sizeof(struct gfs2_dinode));
147 brelse(bh);
148 } else {
f25ef0c1 149 error = gfs2_unstuffer_page(ip, dibh, block, page);
b3b94faa
DT
150 if (error)
151 goto out_brelse;
152 }
153 }
154
155 /* Set up the pointer to the new block */
156
350a9b0a 157 gfs2_trans_add_meta(ip->i_gl, dibh);
48516ced 158 di = (struct gfs2_dinode *)dibh->b_data;
b3b94faa
DT
159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
160
a2e0f799 161 if (i_size_read(&ip->i_inode)) {
48516ced 162 *(__be64 *)(di + 1) = cpu_to_be64(block);
77658aad
SW
163 gfs2_add_inode_blocks(&ip->i_inode, 1);
164 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
b3b94faa
DT
165 }
166
ecc30c79 167 ip->i_height = 1;
48516ced 168 di->di_height = cpu_to_be16(1);
b3b94faa 169
a91ea69f 170out_brelse:
b3b94faa 171 brelse(dibh);
a91ea69f 172out:
b3b94faa 173 up_write(&ip->i_rw_mutex);
b3b94faa
DT
174 return error;
175}
176
b3b94faa
DT
177
178/**
179 * find_metapath - Find path through the metadata tree
9b8c81d1 180 * @sdp: The superblock
b3b94faa 181 * @block: The disk block to look up
07e23d68 182 * @mp: The metapath to return the result in
9b8c81d1 183 * @height: The pre-calculated height of the metadata tree
b3b94faa
DT
184 *
185 * This routine returns a struct metapath structure that defines a path
186 * through the metadata of inode "ip" to get to block "block".
187 *
188 * Example:
189 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
190 * filesystem with a blocksize of 4096.
191 *
192 * find_metapath() would return a struct metapath structure set to:
07e23d68 193 * mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
b3b94faa
DT
194 *
195 * That means that in order to get to the block containing the byte at
196 * offset 101342453, we would load the indirect block pointed to by pointer
197 * 0 in the dinode. We would then load the indirect block pointed to by
198 * pointer 48 in that indirect block. We would then load the data block
199 * pointed to by pointer 165 in that indirect block.
200 *
201 * ----------------------------------------
202 * | Dinode | |
203 * | | 4|
204 * | |0 1 2 3 4 5 9|
205 * | | 6|
206 * ----------------------------------------
207 * |
208 * |
209 * V
210 * ----------------------------------------
211 * | Indirect Block |
212 * | 5|
213 * | 4 4 4 4 4 5 5 1|
214 * |0 5 6 7 8 9 0 1 2|
215 * ----------------------------------------
216 * |
217 * |
218 * V
219 * ----------------------------------------
220 * | Indirect Block |
221 * | 1 1 1 1 1 5|
222 * | 6 6 6 6 6 1|
223 * |0 3 4 5 6 7 2|
224 * ----------------------------------------
225 * |
226 * |
227 * V
228 * ----------------------------------------
229 * | Data block containing offset |
230 * | 101342453 |
231 * | |
232 * | |
233 * ----------------------------------------
234 *
235 */
236
9b8c81d1
SW
237static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
238 struct metapath *mp, unsigned int height)
b3b94faa 239{
b3b94faa
DT
240 unsigned int i;
241
5f8bd444 242 mp->mp_fheight = height;
9b8c81d1 243 for (i = height; i--;)
7eabb77e 244 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
b3b94faa
DT
245}
246
5af4e7a0 247static inline unsigned int metapath_branch_start(const struct metapath *mp)
9b8c81d1 248{
5af4e7a0
BM
249 if (mp->mp_list[0] == 0)
250 return 2;
251 return 1;
9b8c81d1
SW
252}
253
d552a2b9 254/**
20cdc193 255 * metaptr1 - Return the first possible metadata pointer in a metapath buffer
d552a2b9
BP
256 * @height: The metadata height (0 = dinode)
257 * @mp: The metapath
258 */
259static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
260{
261 struct buffer_head *bh = mp->mp_bh[height];
262 if (height == 0)
263 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
264 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
265}
266
b3b94faa
DT
267/**
268 * metapointer - Return pointer to start of metadata in a buffer
b3b94faa
DT
269 * @height: The metadata height (0 = dinode)
270 * @mp: The metapath
271 *
272 * Return a pointer to the block number of the next height of the metadata
273 * tree given a buffer containing the pointer to the current height of the
274 * metadata tree.
275 */
276
9b8c81d1 277static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
b3b94faa 278{
d552a2b9
BP
279 __be64 *p = metaptr1(height, mp);
280 return p + mp->mp_list[height];
b3b94faa
DT
281}
282
7841b9f0
AG
283static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
284{
285 const struct buffer_head *bh = mp->mp_bh[height];
286 return (const __be64 *)(bh->b_data + bh->b_size);
287}
288
289static void clone_metapath(struct metapath *clone, struct metapath *mp)
290{
291 unsigned int hgt;
292
293 *clone = *mp;
294 for (hgt = 0; hgt < mp->mp_aheight; hgt++)
295 get_bh(clone->mp_bh[hgt]);
296}
297
5cf26b1e 298static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
b99b98dc 299{
b99b98dc
SW
300 const __be64 *t;
301
5cf26b1e 302 for (t = start; t < end; t++) {
c3ce5aa9
AG
303 struct buffer_head *rabh;
304
b99b98dc
SW
305 if (!*t)
306 continue;
307
308 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
309 if (trylock_buffer(rabh)) {
310 if (!buffer_uptodate(rabh)) {
311 rabh->b_end_io = end_buffer_read_sync;
e477b24b
CL
312 submit_bh(REQ_OP_READ,
313 REQ_RAHEAD | REQ_META | REQ_PRIO,
314 rabh);
b99b98dc
SW
315 continue;
316 }
317 unlock_buffer(rabh);
318 }
319 brelse(rabh);
320 }
321}
322
e8b43fe0
AG
323static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
324 unsigned int x, unsigned int h)
d552a2b9 325{
e8b43fe0
AG
326 for (; x < h; x++) {
327 __be64 *ptr = metapointer(x, mp);
328 u64 dblock = be64_to_cpu(*ptr);
329 int ret;
d552a2b9 330
e8b43fe0
AG
331 if (!dblock)
332 break;
333 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
334 if (ret)
335 return ret;
336 }
337 mp->mp_aheight = x + 1;
338 return 0;
d552a2b9
BP
339}
340
b3b94faa 341/**
9b8c81d1
SW
342 * lookup_metapath - Walk the metadata tree to a specific point
343 * @ip: The inode
b3b94faa 344 * @mp: The metapath
b3b94faa 345 *
9b8c81d1
SW
346 * Assumes that the inode's buffer has already been looked up and
347 * hooked onto mp->mp_bh[0] and that the metapath has been initialised
348 * by find_metapath().
349 *
350 * If this function encounters part of the tree which has not been
351 * allocated, it returns the current height of the tree at the point
352 * at which it found the unallocated block. Blocks which are found are
353 * added to the mp->mp_bh[] list.
b3b94faa 354 *
e8b43fe0 355 * Returns: error
b3b94faa
DT
356 */
357
9b8c81d1 358static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
11707ea0 359{
e8b43fe0 360 return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
dbac6710
SW
361}
362
d552a2b9
BP
363/**
364 * fillup_metapath - fill up buffers for the metadata path to a specific height
365 * @ip: The inode
366 * @mp: The metapath
367 * @h: The height to which it should be mapped
368 *
369 * Similar to lookup_metapath, but does lookups for a range of heights
370 *
c3ce5aa9 371 * Returns: error or the number of buffers filled
d552a2b9
BP
372 */
373
374static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
375{
e8b43fe0 376 unsigned int x = 0;
c3ce5aa9 377 int ret;
d552a2b9
BP
378
379 if (h) {
380 /* find the first buffer we need to look up. */
e8b43fe0
AG
381 for (x = h - 1; x > 0; x--) {
382 if (mp->mp_bh[x])
383 break;
d552a2b9
BP
384 }
385 }
c3ce5aa9
AG
386 ret = __fillup_metapath(ip, mp, x, h);
387 if (ret)
388 return ret;
389 return mp->mp_aheight - x - 1;
d552a2b9
BP
390}
391
9b8c81d1 392static inline void release_metapath(struct metapath *mp)
dbac6710
SW
393{
394 int i;
395
9b8c81d1
SW
396 for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
397 if (mp->mp_bh[i] == NULL)
398 break;
399 brelse(mp->mp_bh[i]);
400 }
11707ea0
SW
401}
402
30cbf189
SW
403/**
404 * gfs2_extent_length - Returns length of an extent of blocks
405 * @start: Start of the buffer
406 * @len: Length of the buffer in bytes
407 * @ptr: Current position in the buffer
408 * @limit: Max extent length to return (0 = unlimited)
409 * @eob: Set to 1 if we hit "end of block"
410 *
411 * If the first block is zero (unallocated) it will return the number of
412 * unallocated blocks in the extent, otherwise it will return the number
413 * of contiguous blocks in the extent.
414 *
415 * Returns: The length of the extent (minimum of one block)
416 */
417
b650738c 418static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
30cbf189
SW
419{
420 const __be64 *end = (start + len);
421 const __be64 *first = ptr;
422 u64 d = be64_to_cpu(*ptr);
423
424 *eob = 0;
425 do {
426 ptr++;
427 if (ptr >= end)
428 break;
429 if (limit && --limit == 0)
430 break;
431 if (d)
432 d++;
433 } while(be64_to_cpu(*ptr) == d);
434 if (ptr >= end)
435 *eob = 1;
436 return (ptr - first);
437}
438
7841b9f0
AG
439typedef const __be64 *(*gfs2_metadata_walker)(
440 struct metapath *mp,
441 const __be64 *start, const __be64 *end,
442 u64 factor, void *data);
443
444#define WALK_STOP ((__be64 *)0)
445#define WALK_NEXT ((__be64 *)1)
446
447static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
448 u64 len, struct metapath *mp, gfs2_metadata_walker walker,
449 void *data)
450{
451 struct metapath clone;
452 struct gfs2_inode *ip = GFS2_I(inode);
453 struct gfs2_sbd *sdp = GFS2_SB(inode);
454 const __be64 *start, *end, *ptr;
455 u64 factor = 1;
456 unsigned int hgt;
457 int ret = 0;
458
459 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
460 factor *= sdp->sd_inptrs;
461
462 for (;;) {
463 u64 step;
464
465 /* Walk indirect block. */
466 start = metapointer(hgt, mp);
467 end = metaend(hgt, mp);
468
469 step = (end - start) * factor;
470 if (step > len)
471 end = start + DIV_ROUND_UP_ULL(len, factor);
472
473 ptr = walker(mp, start, end, factor, data);
474 if (ptr == WALK_STOP)
475 break;
476 if (step >= len)
477 break;
478 len -= step;
479 if (ptr != WALK_NEXT) {
480 BUG_ON(!*ptr);
481 mp->mp_list[hgt] += ptr - start;
482 goto fill_up_metapath;
483 }
484
485lower_metapath:
486 /* Decrease height of metapath. */
487 if (mp != &clone) {
488 clone_metapath(&clone, mp);
489 mp = &clone;
490 }
491 brelse(mp->mp_bh[hgt]);
492 mp->mp_bh[hgt] = NULL;
493 if (!hgt)
494 break;
495 hgt--;
496 factor *= sdp->sd_inptrs;
497
498 /* Advance in metadata tree. */
499 (mp->mp_list[hgt])++;
500 start = metapointer(hgt, mp);
501 end = metaend(hgt, mp);
502 if (start >= end) {
503 mp->mp_list[hgt] = 0;
504 if (!hgt)
505 break;
506 goto lower_metapath;
507 }
508
509fill_up_metapath:
510 /* Increase height of metapath. */
511 if (mp != &clone) {
512 clone_metapath(&clone, mp);
513 mp = &clone;
514 }
515 ret = fillup_metapath(ip, mp, ip->i_height - 1);
516 if (ret < 0)
517 break;
518 hgt += ret;
519 for (; ret; ret--)
520 do_div(factor, sdp->sd_inptrs);
521 mp->mp_aheight = hgt + 1;
522 }
523 if (mp == &clone)
524 release_metapath(mp);
525 return ret;
526}
527
528struct gfs2_hole_walker_args {
529 u64 blocks;
530};
531
532static const __be64 *gfs2_hole_walker(struct metapath *mp,
533 const __be64 *start, const __be64 *end,
534 u64 factor, void *data)
535{
536 struct gfs2_hole_walker_args *args = data;
537 const __be64 *ptr;
538
539 for (ptr = start; ptr < end; ptr++) {
540 if (*ptr) {
541 args->blocks += (ptr - start) * factor;
542 if (mp->mp_aheight == mp->mp_fheight)
543 return WALK_STOP;
544 return ptr; /* increase height */
545 }
546 }
547 args->blocks += (end - start) * factor;
548 return WALK_NEXT;
549}
550
551/**
552 * gfs2_hole_size - figure out the size of a hole
553 * @inode: The inode
554 * @lblock: The logical starting block number
555 * @len: How far to look (in blocks)
556 * @mp: The metapath at lblock
557 * @iomap: The iomap to store the hole size in
558 *
559 * This function modifies @mp.
560 *
561 * Returns: errno on error
562 */
563static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
564 struct metapath *mp, struct iomap *iomap)
565{
566 struct gfs2_hole_walker_args args = { };
567 int ret = 0;
568
569 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
570 if (!ret)
571 iomap->length = args.blocks << inode->i_blkbits;
572 return ret;
573}
574
9b8c81d1
SW
575static inline __be64 *gfs2_indirect_init(struct metapath *mp,
576 struct gfs2_glock *gl, unsigned int i,
577 unsigned offset, u64 bn)
578{
579 __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
580 ((i > 1) ? sizeof(struct gfs2_meta_header) :
581 sizeof(struct gfs2_dinode)));
582 BUG_ON(i < 1);
583 BUG_ON(mp->mp_bh[i] != NULL);
584 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
350a9b0a 585 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
9b8c81d1
SW
586 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
587 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
588 ptr += offset;
589 *ptr = cpu_to_be64(bn);
590 return ptr;
591}
592
593enum alloc_state {
594 ALLOC_DATA = 0,
595 ALLOC_GROW_DEPTH = 1,
596 ALLOC_GROW_HEIGHT = 2,
597 /* ALLOC_UNSTUFF = 3, TBD and rather complicated */
598};
599
600/**
628e366d 601 * gfs2_iomap_alloc - Build a metadata tree of the requested height
9b8c81d1 602 * @inode: The GFS2 inode
628e366d
AG
603 * @iomap: The iomap structure
604 * @flags: iomap flags
5f8bd444 605 * @mp: The metapath, with proper height information calculated
9b8c81d1
SW
606 *
607 * In this routine we may have to alloc:
608 * i) Indirect blocks to grow the metadata tree height
609 * ii) Indirect blocks to fill in lower part of the metadata tree
610 * iii) Data blocks
611 *
612 * The function is in two parts. The first part works out the total
613 * number of blocks which we need. The second part does the actual
614 * allocation asking for an extent at a time (if enough contiguous free
615 * blocks are available, there will only be one request per bmap call)
616 * and uses the state machine to initialise the blocks in order.
617 *
628e366d
AG
618 * Right now, this function will allocate at most one indirect block
619 * worth of data -- with a default block size of 4K, that's slightly
620 * less than 2M. If this limitation is ever removed to allow huge
621 * allocations, we would probably still want to limit the iomap size we
622 * return to avoid stalling other tasks during huge writes; the next
623 * iomap iteration would then find the blocks already allocated.
624 *
9b8c81d1
SW
625 * Returns: errno on error
626 */
627
3974320c
BP
628static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
629 unsigned flags, struct metapath *mp)
9b8c81d1
SW
630{
631 struct gfs2_inode *ip = GFS2_I(inode);
632 struct gfs2_sbd *sdp = GFS2_SB(inode);
633 struct buffer_head *dibh = mp->mp_bh[0];
5f8bd444 634 u64 bn;
5af4e7a0 635 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
3974320c 636 unsigned dblks = 0;
9b8c81d1 637 unsigned ptrs_per_blk;
5f8bd444 638 const unsigned end_of_metadata = mp->mp_fheight - 1;
628e366d 639 int ret;
9b8c81d1
SW
640 enum alloc_state state;
641 __be64 *ptr;
642 __be64 zero_bn = 0;
3974320c 643 size_t maxlen = iomap->length >> inode->i_blkbits;
9b8c81d1 644
5f8bd444 645 BUG_ON(mp->mp_aheight < 1);
9b8c81d1
SW
646 BUG_ON(dibh == NULL);
647
350a9b0a 648 gfs2_trans_add_meta(ip->i_gl, dibh);
9b8c81d1 649
628e366d
AG
650 down_write(&ip->i_rw_mutex);
651
5f8bd444 652 if (mp->mp_fheight == mp->mp_aheight) {
9b8c81d1 653 struct buffer_head *bh;
3974320c
BP
654 int eob;
655
9b8c81d1
SW
656 /* Bottom indirect block exists, find unalloced extent size */
657 ptr = metapointer(end_of_metadata, mp);
658 bh = mp->mp_bh[end_of_metadata];
3974320c
BP
659 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
660 maxlen, &eob);
661 BUG_ON(dblks < 1);
9b8c81d1
SW
662 state = ALLOC_DATA;
663 } else {
664 /* Need to allocate indirect blocks */
5f8bd444
BP
665 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
666 sdp->sd_diptrs;
3974320c
BP
667 dblks = min(maxlen, (size_t)(ptrs_per_blk -
668 mp->mp_list[end_of_metadata]));
5f8bd444 669 if (mp->mp_fheight == ip->i_height) {
9b8c81d1 670 /* Writing into existing tree, extend tree down */
5f8bd444 671 iblks = mp->mp_fheight - mp->mp_aheight;
9b8c81d1
SW
672 state = ALLOC_GROW_DEPTH;
673 } else {
674 /* Building up tree height */
675 state = ALLOC_GROW_HEIGHT;
5f8bd444 676 iblks = mp->mp_fheight - ip->i_height;
5af4e7a0 677 branch_start = metapath_branch_start(mp);
5f8bd444 678 iblks += (mp->mp_fheight - branch_start);
9b8c81d1
SW
679 }
680 }
681
682 /* start of the second part of the function (state machine) */
683
3974320c 684 blks = dblks + iblks;
5f8bd444 685 i = mp->mp_aheight;
9b8c81d1
SW
686 do {
687 n = blks - alloced;
628e366d
AG
688 ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
689 if (ret)
690 goto out;
9b8c81d1
SW
691 alloced += n;
692 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
693 gfs2_trans_add_unrevoke(sdp, bn, n);
694 switch (state) {
695 /* Growing height of tree */
696 case ALLOC_GROW_HEIGHT:
697 if (i == 1) {
698 ptr = (__be64 *)(dibh->b_data +
699 sizeof(struct gfs2_dinode));
700 zero_bn = *ptr;
701 }
5f8bd444
BP
702 for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
703 i++, n--)
9b8c81d1 704 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
5f8bd444 705 if (i - 1 == mp->mp_fheight - ip->i_height) {
9b8c81d1
SW
706 i--;
707 gfs2_buffer_copy_tail(mp->mp_bh[i],
708 sizeof(struct gfs2_meta_header),
709 dibh, sizeof(struct gfs2_dinode));
710 gfs2_buffer_clear_tail(dibh,
711 sizeof(struct gfs2_dinode) +
712 sizeof(__be64));
713 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
714 sizeof(struct gfs2_meta_header));
715 *ptr = zero_bn;
716 state = ALLOC_GROW_DEPTH;
5f8bd444 717 for(i = branch_start; i < mp->mp_fheight; i++) {
9b8c81d1
SW
718 if (mp->mp_bh[i] == NULL)
719 break;
720 brelse(mp->mp_bh[i]);
721 mp->mp_bh[i] = NULL;
722 }
5af4e7a0 723 i = branch_start;
9b8c81d1
SW
724 }
725 if (n == 0)
726 break;
727 /* Branching from existing tree */
728 case ALLOC_GROW_DEPTH:
5f8bd444 729 if (i > 1 && i < mp->mp_fheight)
350a9b0a 730 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
5f8bd444 731 for (; i < mp->mp_fheight && n > 0; i++, n--)
9b8c81d1
SW
732 gfs2_indirect_init(mp, ip->i_gl, i,
733 mp->mp_list[i-1], bn++);
5f8bd444 734 if (i == mp->mp_fheight)
9b8c81d1
SW
735 state = ALLOC_DATA;
736 if (n == 0)
737 break;
738 /* Tree complete, adding data blocks */
739 case ALLOC_DATA:
3974320c 740 BUG_ON(n > dblks);
9b8c81d1 741 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
350a9b0a 742 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
3974320c 743 dblks = n;
9b8c81d1 744 ptr = metapointer(end_of_metadata, mp);
3974320c 745 iomap->addr = bn << inode->i_blkbits;
628e366d 746 iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
9b8c81d1
SW
747 while (n-- > 0)
748 *ptr++ = cpu_to_be64(bn++);
749 break;
750 }
3974320c 751 } while (iomap->addr == IOMAP_NULL_ADDR);
9b8c81d1 752
3974320c 753 iomap->length = (u64)dblks << inode->i_blkbits;
5f8bd444 754 ip->i_height = mp->mp_fheight;
9b8c81d1 755 gfs2_add_inode_blocks(&ip->i_inode, alloced);
628e366d
AG
756 gfs2_dinode_out(ip, dibh->b_data);
757out:
758 up_write(&ip->i_rw_mutex);
759 return ret;
9b8c81d1
SW
760}
761
3974320c
BP
762static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
763{
764 struct gfs2_inode *ip = GFS2_I(inode);
765
766 iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
767 sizeof(struct gfs2_dinode);
768 iomap->offset = 0;
769 iomap->length = i_size_read(inode);
19319b53 770 iomap->type = IOMAP_INLINE;
3974320c
BP
771}
772
7ee66c03
CH
773#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
774
3974320c 775/**
628e366d 776 * gfs2_iomap_get - Map blocks from an inode to disk blocks
3974320c
BP
777 * @inode: The inode
778 * @pos: Starting position in bytes
779 * @length: Length to map, in bytes
780 * @flags: iomap flags
781 * @iomap: The iomap structure
628e366d 782 * @mp: The metapath
3974320c
BP
783 *
784 * Returns: errno
785 */
628e366d
AG
786static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
787 unsigned flags, struct iomap *iomap,
788 struct metapath *mp)
b3b94faa 789{
feaa7bba
SW
790 struct gfs2_inode *ip = GFS2_I(inode);
791 struct gfs2_sbd *sdp = GFS2_SB(inode);
9b8c81d1 792 __be64 *ptr;
3974320c 793 sector_t lblock;
628e366d
AG
794 sector_t lblock_stop;
795 int ret;
9b8c81d1 796 int eob;
628e366d 797 u64 len;
9b8c81d1
SW
798 struct buffer_head *bh;
799 u8 height;
7276b3b0 800
628e366d
AG
801 if (!length)
802 return -EINVAL;
b3b94faa 803
49edd5bf
AG
804 if (gfs2_is_stuffed(ip)) {
805 if (flags & IOMAP_REPORT) {
628e366d
AG
806 if (pos >= i_size_read(inode))
807 return -ENOENT;
49edd5bf 808 gfs2_stuffed_iomap(inode, iomap);
628e366d 809 return 0;
49edd5bf
AG
810 }
811 BUG_ON(!(flags & IOMAP_WRITE));
3974320c 812 }
3974320c 813 lblock = pos >> inode->i_blkbits;
3974320c 814 iomap->offset = lblock << inode->i_blkbits;
628e366d
AG
815 lblock_stop = (pos + length - 1) >> inode->i_blkbits;
816 len = lblock_stop - lblock + 1;
817
818 down_read(&ip->i_rw_mutex);
20cdc193 819
628e366d 820 ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
9b8c81d1 821 if (ret)
628e366d 822 goto unlock;
b3b94faa 823
9b8c81d1 824 height = ip->i_height;
9a38662b 825 while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
9b8c81d1 826 height++;
628e366d 827 find_metapath(sdp, lblock, mp, height);
9b8c81d1
SW
828 if (height > ip->i_height || gfs2_is_stuffed(ip))
829 goto do_alloc;
3974320c 830
628e366d 831 ret = lookup_metapath(ip, mp);
e8b43fe0 832 if (ret)
628e366d 833 goto unlock;
3974320c 834
628e366d 835 if (mp->mp_aheight != ip->i_height)
9b8c81d1 836 goto do_alloc;
3974320c 837
628e366d 838 ptr = metapointer(ip->i_height - 1, mp);
9b8c81d1
SW
839 if (*ptr == 0)
840 goto do_alloc;
3974320c 841
628e366d
AG
842 bh = mp->mp_bh[ip->i_height - 1];
843 len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
3974320c 844
628e366d
AG
845 iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
846 iomap->length = len << inode->i_blkbits;
847 iomap->type = IOMAP_MAPPED;
848 iomap->flags = IOMAP_F_MERGED;
9b8c81d1 849 if (eob)
7ee66c03 850 iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
3974320c 851
3974320c 852out:
628e366d
AG
853 iomap->bdev = inode->i_sb->s_bdev;
854unlock:
855 up_read(&ip->i_rw_mutex);
9b8c81d1 856 return ret;
30cbf189 857
9b8c81d1 858do_alloc:
628e366d
AG
859 iomap->addr = IOMAP_NULL_ADDR;
860 iomap->length = len << inode->i_blkbits;
861 iomap->type = IOMAP_HOLE;
862 iomap->flags = 0;
863 if (flags & IOMAP_REPORT) {
49edd5bf
AG
864 loff_t size = i_size_read(inode);
865 if (pos >= size)
3974320c 866 ret = -ENOENT;
628e366d
AG
867 else if (height == ip->i_height)
868 ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
49edd5bf
AG
869 else
870 iomap->length = size - pos;
b3b94faa 871 }
628e366d 872 goto out;
3974320c
BP
873}
874
628e366d
AG
875static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
876 unsigned flags, struct iomap *iomap)
877{
878 struct gfs2_inode *ip = GFS2_I(inode);
879 struct metapath mp = { .mp_aheight = 1, };
880 int ret;
881
882 trace_gfs2_iomap_start(ip, pos, length, flags);
883 if (flags & IOMAP_WRITE) {
884 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
885 if (!ret && iomap->type == IOMAP_HOLE)
886 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
887 release_metapath(&mp);
888 } else {
889 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
890 release_metapath(&mp);
891 }
892 trace_gfs2_iomap_end(ip, iomap, ret);
893 return ret;
894}
895
896const struct iomap_ops gfs2_iomap_ops = {
897 .iomap_begin = gfs2_iomap_begin,
898};
899
3974320c 900/**
d39d18e0 901 * gfs2_block_map - Map one or more blocks of an inode to a disk block
3974320c
BP
902 * @inode: The inode
903 * @lblock: The logical block number
904 * @bh_map: The bh to be mapped
905 * @create: True if its ok to alloc blocks to satify the request
906 *
d39d18e0
AG
907 * The size of the requested mapping is defined in bh_map->b_size.
908 *
909 * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
910 * when @lblock is not mapped. Sets buffer_mapped(bh_map) and
911 * bh_map->b_size to indicate the size of the mapping when @lblock and
912 * successive blocks are mapped, up to the requested size.
913 *
914 * Sets buffer_boundary() if a read of metadata will be required
915 * before the next block can be mapped. Sets buffer_new() if new
916 * blocks were allocated.
3974320c
BP
917 *
918 * Returns: errno
919 */
920
921int gfs2_block_map(struct inode *inode, sector_t lblock,
922 struct buffer_head *bh_map, int create)
923{
924 struct gfs2_inode *ip = GFS2_I(inode);
628e366d
AG
925 loff_t pos = (loff_t)lblock << inode->i_blkbits;
926 loff_t length = bh_map->b_size;
927 struct metapath mp = { .mp_aheight = 1, };
928 struct iomap iomap = { };
929 int ret;
3974320c
BP
930
931 clear_buffer_mapped(bh_map);
932 clear_buffer_new(bh_map);
933 clear_buffer_boundary(bh_map);
934 trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
935
628e366d
AG
936 if (create) {
937 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
938 if (!ret && iomap.type == IOMAP_HOLE)
939 ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
940 release_metapath(&mp);
941 } else {
942 ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
943 release_metapath(&mp);
944
945 /* Return unmapped buffer beyond the end of file. */
946 if (ret == -ENOENT) {
3974320c 947 ret = 0;
628e366d 948 goto out;
3974320c 949 }
3974320c 950 }
628e366d
AG
951 if (ret)
952 goto out;
3974320c
BP
953
954 if (iomap.length > bh_map->b_size) {
955 iomap.length = bh_map->b_size;
7ee66c03 956 iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
5f8bd444 957 }
3974320c
BP
958 if (iomap.addr != IOMAP_NULL_ADDR)
959 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
960 bh_map->b_size = iomap.length;
7ee66c03 961 if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
3974320c
BP
962 set_buffer_boundary(bh_map);
963 if (iomap.flags & IOMAP_F_NEW)
964 set_buffer_new(bh_map);
965
966out:
967 trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
968 return ret;
fd88de56
SW
969}
970
941e6d7d
SW
971/*
972 * Deprecated: do not use in new code
973 */
fd88de56
SW
974int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
975{
23591256 976 struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
7a6bbacb 977 int ret;
fd88de56
SW
978 int create = *new;
979
980 BUG_ON(!extlen);
981 BUG_ON(!dblock);
982 BUG_ON(!new);
983
47a9a527 984 bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
e9e1ef2b 985 ret = gfs2_block_map(inode, lblock, &bh, create);
7a6bbacb
SW
986 *extlen = bh.b_size >> inode->i_blkbits;
987 *dblock = bh.b_blocknr;
988 if (buffer_new(&bh))
989 *new = 1;
990 else
991 *new = 0;
992 return ret;
b3b94faa
DT
993}
994
ba7f7290 995/**
bdba0d5e 996 * gfs2_block_zero_range - Deal with zeroing out data
ba7f7290
SW
997 *
998 * This is partly borrowed from ext3.
999 */
bdba0d5e
AG
1000static int gfs2_block_zero_range(struct inode *inode, loff_t from,
1001 unsigned int length)
ba7f7290 1002{
bdba0d5e 1003 struct address_space *mapping = inode->i_mapping;
ba7f7290 1004 struct gfs2_inode *ip = GFS2_I(inode);
09cbfeaf
KS
1005 unsigned long index = from >> PAGE_SHIFT;
1006 unsigned offset = from & (PAGE_SIZE-1);
bdba0d5e 1007 unsigned blocksize, iblock, pos;
ba7f7290
SW
1008 struct buffer_head *bh;
1009 struct page *page;
ba7f7290
SW
1010 int err;
1011
220cca2a 1012 page = find_or_create_page(mapping, index, GFP_NOFS);
ba7f7290
SW
1013 if (!page)
1014 return 0;
1015
1016 blocksize = inode->i_sb->s_blocksize;
09cbfeaf 1017 iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
ba7f7290
SW
1018
1019 if (!page_has_buffers(page))
1020 create_empty_buffers(page, blocksize, 0);
1021
1022 /* Find the buffer that contains "offset" */
1023 bh = page_buffers(page);
1024 pos = blocksize;
1025 while (offset >= pos) {
1026 bh = bh->b_this_page;
1027 iblock++;
1028 pos += blocksize;
1029 }
1030
1031 err = 0;
1032
1033 if (!buffer_mapped(bh)) {
e9e1ef2b 1034 gfs2_block_map(inode, iblock, bh, 0);
ba7f7290
SW
1035 /* unmapped? It's a hole - nothing to do */
1036 if (!buffer_mapped(bh))
1037 goto unlock;
1038 }
1039
1040 /* Ok, it's mapped. Make sure it's up-to-date */
1041 if (PageUptodate(page))
1042 set_buffer_uptodate(bh);
1043
1044 if (!buffer_uptodate(bh)) {
1045 err = -EIO;
dfec8a14 1046 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
ba7f7290
SW
1047 wait_on_buffer(bh);
1048 /* Uhhuh. Read error. Complain and punt. */
1049 if (!buffer_uptodate(bh))
1050 goto unlock;
1875f2f3 1051 err = 0;
ba7f7290
SW
1052 }
1053
845802b1 1054 if (gfs2_is_jdata(ip))
350a9b0a 1055 gfs2_trans_add_data(ip->i_gl, bh);
845802b1
AG
1056 else
1057 gfs2_ordered_add_inode(ip);
ba7f7290 1058
eebd2aa3 1059 zero_user(page, offset, length);
40bc9a27 1060 mark_buffer_dirty(bh);
ba7f7290
SW
1061unlock:
1062 unlock_page(page);
09cbfeaf 1063 put_page(page);
ba7f7290
SW
1064 return err;
1065}
1066
c62baf65
FF
1067#define GFS2_JTRUNC_REVOKES 8192
1068
fa731fc4
SW
1069/**
1070 * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
1071 * @inode: The inode being truncated
1072 * @oldsize: The original (larger) size
1073 * @newsize: The new smaller size
1074 *
1075 * With jdata files, we have to journal a revoke for each block which is
1076 * truncated. As a result, we need to split this into separate transactions
1077 * if the number of pages being truncated gets too large.
1078 */
1079
fa731fc4
SW
1080static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
1081{
1082 struct gfs2_sbd *sdp = GFS2_SB(inode);
1083 u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1084 u64 chunk;
1085 int error;
1086
1087 while (oldsize != newsize) {
e7fdf004
AG
1088 struct gfs2_trans *tr;
1089 unsigned int offs;
1090
fa731fc4
SW
1091 chunk = oldsize - newsize;
1092 if (chunk > max_chunk)
1093 chunk = max_chunk;
e7fdf004
AG
1094
1095 offs = oldsize & ~PAGE_MASK;
1096 if (offs && chunk > PAGE_SIZE)
1097 chunk = offs + ((chunk - offs) & PAGE_MASK);
1098
7caef267 1099 truncate_pagecache(inode, oldsize - chunk);
fa731fc4 1100 oldsize -= chunk;
e7fdf004
AG
1101
1102 tr = current->journal_info;
1103 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1104 continue;
1105
fa731fc4
SW
1106 gfs2_trans_end(sdp);
1107 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1108 if (error)
1109 return error;
1110 }
1111
1112 return 0;
1113}
1114
8b5860a3 1115static int trunc_start(struct inode *inode, u64 newsize)
b3b94faa 1116{
ff8f33c8
SW
1117 struct gfs2_inode *ip = GFS2_I(inode);
1118 struct gfs2_sbd *sdp = GFS2_SB(inode);
80990f40 1119 struct buffer_head *dibh = NULL;
b3b94faa 1120 int journaled = gfs2_is_jdata(ip);
8b5860a3 1121 u64 oldsize = inode->i_size;
b3b94faa
DT
1122 int error;
1123
fa731fc4
SW
1124 if (journaled)
1125 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1126 else
1127 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
b3b94faa
DT
1128 if (error)
1129 return error;
1130
1131 error = gfs2_meta_inode_buffer(ip, &dibh);
1132 if (error)
1133 goto out;
1134
350a9b0a 1135 gfs2_trans_add_meta(ip->i_gl, dibh);
ff8f33c8 1136
b3b94faa 1137 if (gfs2_is_stuffed(ip)) {
ff8f33c8 1138 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
b3b94faa 1139 } else {
bdba0d5e
AG
1140 unsigned int blocksize = i_blocksize(inode);
1141 unsigned int offs = newsize & (blocksize - 1);
1142 if (offs) {
1143 error = gfs2_block_zero_range(inode, newsize,
1144 blocksize - offs);
ff8f33c8 1145 if (error)
80990f40 1146 goto out;
b3b94faa 1147 }
ff8f33c8 1148 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
b3b94faa
DT
1149 }
1150
ff8f33c8 1151 i_size_write(inode, newsize);
078cd827 1152 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
ff8f33c8 1153 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa 1154
fa731fc4
SW
1155 if (journaled)
1156 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1157 else
7caef267 1158 truncate_pagecache(inode, newsize);
fa731fc4 1159
a91ea69f 1160out:
80990f40
AG
1161 brelse(dibh);
1162 if (current->journal_info)
1163 gfs2_trans_end(sdp);
b3b94faa
DT
1164 return error;
1165}
1166
628e366d
AG
1167int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
1168 struct iomap *iomap)
1169{
1170 struct metapath mp = { .mp_aheight = 1, };
1171 int ret;
1172
1173 ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
1174 if (!ret && iomap->type == IOMAP_HOLE)
1175 ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
1176 release_metapath(&mp);
1177 return ret;
1178}
1179
d552a2b9
BP
1180/**
1181 * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1182 * @ip: inode
1183 * @rg_gh: holder of resource group glock
5cf26b1e
AG
1184 * @bh: buffer head to sweep
1185 * @start: starting point in bh
1186 * @end: end point in bh
1187 * @meta: true if bh points to metadata (rather than data)
d552a2b9 1188 * @btotal: place to keep count of total blocks freed
d552a2b9
BP
1189 *
1190 * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1191 * free, and free them all. However, we do it one rgrp at a time. If this
1192 * block has references to multiple rgrps, we break it into individual
1193 * transactions. This allows other processes to use the rgrps while we're
1194 * focused on a single one, for better concurrency / performance.
1195 * At every transaction boundary, we rewrite the inode into the journal.
1196 * That way the bitmaps are kept consistent with the inode and we can recover
1197 * if we're interrupted by power-outages.
1198 *
1199 * Returns: 0, or return code if an error occurred.
1200 * *btotal has the total number of blocks freed
1201 */
1202static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
5cf26b1e
AG
1203 struct buffer_head *bh, __be64 *start, __be64 *end,
1204 bool meta, u32 *btotal)
b3b94faa 1205{
9b8c81d1 1206 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
d552a2b9
BP
1207 struct gfs2_rgrpd *rgd;
1208 struct gfs2_trans *tr;
5cf26b1e 1209 __be64 *p;
d552a2b9
BP
1210 int blks_outside_rgrp;
1211 u64 bn, bstart, isize_blks;
1212 s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
d552a2b9
BP
1213 int ret = 0;
1214 bool buf_in_tr = false; /* buffer was added to transaction */
1215
d552a2b9 1216more_rgrps:
5cf26b1e
AG
1217 rgd = NULL;
1218 if (gfs2_holder_initialized(rd_gh)) {
1219 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1220 gfs2_assert_withdraw(sdp,
1221 gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1222 }
d552a2b9
BP
1223 blks_outside_rgrp = 0;
1224 bstart = 0;
1225 blen = 0;
d552a2b9 1226
5cf26b1e 1227 for (p = start; p < end; p++) {
d552a2b9
BP
1228 if (!*p)
1229 continue;
1230 bn = be64_to_cpu(*p);
5cf26b1e
AG
1231
1232 if (rgd) {
1233 if (!rgrp_contains_block(rgd, bn)) {
1234 blks_outside_rgrp++;
1235 continue;
1236 }
d552a2b9 1237 } else {
90bcab99 1238 rgd = gfs2_blk2rgrpd(sdp, bn, true);
5cf26b1e
AG
1239 if (unlikely(!rgd)) {
1240 ret = -EIO;
1241 goto out;
1242 }
d552a2b9
BP
1243 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1244 0, rd_gh);
1245 if (ret)
1246 goto out;
1247
1248 /* Must be done with the rgrp glock held: */
1249 if (gfs2_rs_active(&ip->i_res) &&
1250 rgd == ip->i_res.rs_rbm.rgd)
1251 gfs2_rs_deltree(&ip->i_res);
1252 }
1253
d552a2b9
BP
1254 /* The size of our transactions will be unknown until we
1255 actually process all the metadata blocks that relate to
1256 the rgrp. So we estimate. We know it can't be more than
1257 the dinode's i_blocks and we don't want to exceed the
1258 journal flush threshold, sd_log_thresh2. */
1259 if (current->journal_info == NULL) {
1260 unsigned int jblocks_rqsted, revokes;
1261
1262 jblocks_rqsted = rgd->rd_length + RES_DINODE +
1263 RES_INDIRECT;
1264 isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1265 if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1266 jblocks_rqsted +=
1267 atomic_read(&sdp->sd_log_thresh2);
1268 else
1269 jblocks_rqsted += isize_blks;
1270 revokes = jblocks_rqsted;
1271 if (meta)
5cf26b1e 1272 revokes += end - start;
d552a2b9
BP
1273 else if (ip->i_depth)
1274 revokes += sdp->sd_inptrs;
1275 ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1276 if (ret)
1277 goto out_unlock;
1278 down_write(&ip->i_rw_mutex);
1279 }
1280 /* check if we will exceed the transaction blocks requested */
1281 tr = current->journal_info;
1282 if (tr->tr_num_buf_new + RES_STATFS +
1283 RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1284 /* We set blks_outside_rgrp to ensure the loop will
1285 be repeated for the same rgrp, but with a new
1286 transaction. */
1287 blks_outside_rgrp++;
1288 /* This next part is tricky. If the buffer was added
1289 to the transaction, we've already set some block
1290 pointers to 0, so we better follow through and free
1291 them, or we will introduce corruption (so break).
1292 This may be impossible, or at least rare, but I
1293 decided to cover the case regardless.
1294
1295 If the buffer was not added to the transaction
1296 (this call), doing so would exceed our transaction
1297 size, so we need to end the transaction and start a
1298 new one (so goto). */
1299
1300 if (buf_in_tr)
1301 break;
1302 goto out_unlock;
1303 }
1304
1305 gfs2_trans_add_meta(ip->i_gl, bh);
1306 buf_in_tr = true;
1307 *p = 0;
1308 if (bstart + blen == bn) {
1309 blen++;
1310 continue;
1311 }
1312 if (bstart) {
1313 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1314 (*btotal) += blen;
1315 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1316 }
1317 bstart = bn;
1318 blen = 1;
1319 }
1320 if (bstart) {
1321 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1322 (*btotal) += blen;
1323 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1324 }
1325out_unlock:
1326 if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1327 outside the rgrp we just processed,
1328 do it all over again. */
1329 if (current->journal_info) {
5cf26b1e
AG
1330 struct buffer_head *dibh;
1331
1332 ret = gfs2_meta_inode_buffer(ip, &dibh);
1333 if (ret)
1334 goto out;
d552a2b9
BP
1335
1336 /* Every transaction boundary, we rewrite the dinode
1337 to keep its di_blocks current in case of failure. */
1338 ip->i_inode.i_mtime = ip->i_inode.i_ctime =
b32c8c76 1339 current_time(&ip->i_inode);
d552a2b9
BP
1340 gfs2_trans_add_meta(ip->i_gl, dibh);
1341 gfs2_dinode_out(ip, dibh->b_data);
5cf26b1e 1342 brelse(dibh);
d552a2b9
BP
1343 up_write(&ip->i_rw_mutex);
1344 gfs2_trans_end(sdp);
1345 }
1346 gfs2_glock_dq_uninit(rd_gh);
1347 cond_resched();
1348 goto more_rgrps;
1349 }
1350out:
1351 return ret;
1352}
1353
10d2cf94
AG
1354static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1355{
1356 if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1357 return false;
1358 return true;
1359}
1360
d552a2b9
BP
1361/**
1362 * find_nonnull_ptr - find a non-null pointer given a metapath and height
d552a2b9
BP
1363 * @mp: starting metapath
1364 * @h: desired height to search
1365 *
10d2cf94 1366 * Assumes the metapath is valid (with buffers) out to height h.
d552a2b9
BP
1367 * Returns: true if a non-null pointer was found in the metapath buffer
1368 * false if all remaining pointers are NULL in the buffer
1369 */
1370static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
10d2cf94
AG
1371 unsigned int h,
1372 __u16 *end_list, unsigned int end_aligned)
d552a2b9 1373{
10d2cf94
AG
1374 struct buffer_head *bh = mp->mp_bh[h];
1375 __be64 *first, *ptr, *end;
1376
1377 first = metaptr1(h, mp);
1378 ptr = first + mp->mp_list[h];
1379 end = (__be64 *)(bh->b_data + bh->b_size);
1380 if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1381 bool keep_end = h < end_aligned;
1382 end = first + end_list[h] + keep_end;
1383 }
d552a2b9 1384
10d2cf94 1385 while (ptr < end) {
c4a9d189 1386 if (*ptr) { /* if we have a non-null pointer */
10d2cf94 1387 mp->mp_list[h] = ptr - first;
c4a9d189
BP
1388 h++;
1389 if (h < GFS2_MAX_META_HEIGHT)
10d2cf94 1390 mp->mp_list[h] = 0;
d552a2b9 1391 return true;
c4a9d189 1392 }
10d2cf94 1393 ptr++;
d552a2b9 1394 }
10d2cf94 1395 return false;
d552a2b9
BP
1396}
1397
1398enum dealloc_states {
1399 DEALLOC_MP_FULL = 0, /* Strip a metapath with all buffers read in */
1400 DEALLOC_MP_LOWER = 1, /* lower the metapath strip height */
1401 DEALLOC_FILL_MP = 2, /* Fill in the metapath to the given height. */
1402 DEALLOC_DONE = 3, /* process complete */
1403};
b3b94faa 1404
5cf26b1e
AG
1405static inline void
1406metapointer_range(struct metapath *mp, int height,
1407 __u16 *start_list, unsigned int start_aligned,
10d2cf94 1408 __u16 *end_list, unsigned int end_aligned,
5cf26b1e
AG
1409 __be64 **start, __be64 **end)
1410{
1411 struct buffer_head *bh = mp->mp_bh[height];
1412 __be64 *first;
1413
1414 first = metaptr1(height, mp);
1415 *start = first;
1416 if (mp_eq_to_hgt(mp, start_list, height)) {
1417 bool keep_start = height < start_aligned;
1418 *start = first + start_list[height] + keep_start;
1419 }
1420 *end = (__be64 *)(bh->b_data + bh->b_size);
10d2cf94
AG
1421 if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1422 bool keep_end = height < end_aligned;
1423 *end = first + end_list[height] + keep_end;
1424 }
1425}
1426
1427static inline bool walk_done(struct gfs2_sbd *sdp,
1428 struct metapath *mp, int height,
1429 __u16 *end_list, unsigned int end_aligned)
1430{
1431 __u16 end;
1432
1433 if (end_list) {
1434 bool keep_end = height < end_aligned;
1435 if (!mp_eq_to_hgt(mp, end_list, height))
1436 return false;
1437 end = end_list[height] + keep_end;
1438 } else
1439 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1440 return mp->mp_list[height] >= end;
5cf26b1e
AG
1441}
1442
d552a2b9 1443/**
10d2cf94 1444 * punch_hole - deallocate blocks in a file
d552a2b9 1445 * @ip: inode to truncate
10d2cf94
AG
1446 * @offset: the start of the hole
1447 * @length: the size of the hole (or 0 for truncate)
1448 *
1449 * Punch a hole into a file or truncate a file at a given position. This
1450 * function operates in whole blocks (@offset and @length are rounded
1451 * accordingly); partially filled blocks must be cleared otherwise.
d552a2b9 1452 *
10d2cf94
AG
1453 * This function works from the bottom up, and from the right to the left. In
1454 * other words, it strips off the highest layer (data) before stripping any of
1455 * the metadata. Doing it this way is best in case the operation is interrupted
1456 * by power failure, etc. The dinode is rewritten in every transaction to
1457 * guarantee integrity.
d552a2b9 1458 */
10d2cf94 1459static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
d552a2b9
BP
1460{
1461 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
bb491ce6 1462 u64 maxsize = sdp->sd_heightsize[ip->i_height];
10d2cf94 1463 struct metapath mp = {};
d552a2b9
BP
1464 struct buffer_head *dibh, *bh;
1465 struct gfs2_holder rd_gh;
cb7f0903 1466 unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
10d2cf94
AG
1467 u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1468 __u16 start_list[GFS2_MAX_META_HEIGHT];
1469 __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
4e56a641 1470 unsigned int start_aligned, uninitialized_var(end_aligned);
d552a2b9
BP
1471 unsigned int strip_h = ip->i_height - 1;
1472 u32 btotal = 0;
1473 int ret, state;
1474 int mp_h; /* metapath buffers are read in to this height */
d552a2b9 1475 u64 prev_bnr = 0;
5cf26b1e 1476 __be64 *start, *end;
b3b94faa 1477
bb491ce6
AG
1478 if (offset >= maxsize) {
1479 /*
1480 * The starting point lies beyond the allocated meta-data;
1481 * there are no blocks do deallocate.
1482 */
1483 return 0;
1484 }
1485
10d2cf94
AG
1486 /*
1487 * The start position of the hole is defined by lblock, start_list, and
1488 * start_aligned. The end position of the hole is defined by lend,
1489 * end_list, and end_aligned.
1490 *
1491 * start_aligned and end_aligned define down to which height the start
1492 * and end positions are aligned to the metadata tree (i.e., the
1493 * position is a multiple of the metadata granularity at the height
1494 * above). This determines at which heights additional meta pointers
1495 * needs to be preserved for the remaining data.
1496 */
b3b94faa 1497
10d2cf94 1498 if (length) {
10d2cf94
AG
1499 u64 end_offset = offset + length;
1500 u64 lend;
1501
1502 /*
1503 * Clip the end at the maximum file size for the given height:
1504 * that's how far the metadata goes; files bigger than that
1505 * will have additional layers of indirection.
1506 */
1507 if (end_offset > maxsize)
1508 end_offset = maxsize;
1509 lend = end_offset >> bsize_shift;
1510
1511 if (lblock >= lend)
1512 return 0;
1513
1514 find_metapath(sdp, lend, &mp, ip->i_height);
1515 end_list = __end_list;
1516 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1517
1518 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1519 if (end_list[mp_h])
1520 break;
1521 }
1522 end_aligned = mp_h;
1523 }
1524
1525 find_metapath(sdp, lblock, &mp, ip->i_height);
cb7f0903
AG
1526 memcpy(start_list, mp.mp_list, sizeof(start_list));
1527
cb7f0903
AG
1528 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1529 if (start_list[mp_h])
1530 break;
1531 }
1532 start_aligned = mp_h;
d552a2b9
BP
1533
1534 ret = gfs2_meta_inode_buffer(ip, &dibh);
1535 if (ret)
1536 return ret;
b3b94faa 1537
d552a2b9
BP
1538 mp.mp_bh[0] = dibh;
1539 ret = lookup_metapath(ip, &mp);
e8b43fe0
AG
1540 if (ret)
1541 goto out_metapath;
c3ce5aa9
AG
1542
1543 /* issue read-ahead on metadata */
5cf26b1e
AG
1544 for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1545 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1546 end_list, end_aligned, &start, &end);
5cf26b1e
AG
1547 gfs2_metapath_ra(ip->i_gl, start, end);
1548 }
c3ce5aa9 1549
e8b43fe0 1550 if (mp.mp_aheight == ip->i_height)
d552a2b9
BP
1551 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1552 else
1553 state = DEALLOC_FILL_MP; /* deal with partial metapath */
b3b94faa 1554
d552a2b9
BP
1555 ret = gfs2_rindex_update(sdp);
1556 if (ret)
1557 goto out_metapath;
1558
1559 ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1560 if (ret)
1561 goto out_metapath;
1562 gfs2_holder_mark_uninitialized(&rd_gh);
1563
1564 mp_h = strip_h;
1565
1566 while (state != DEALLOC_DONE) {
1567 switch (state) {
1568 /* Truncate a full metapath at the given strip height.
1569 * Note that strip_h == mp_h in order to be in this state. */
1570 case DEALLOC_MP_FULL:
d552a2b9
BP
1571 bh = mp.mp_bh[mp_h];
1572 gfs2_assert_withdraw(sdp, bh);
1573 if (gfs2_assert_withdraw(sdp,
1574 prev_bnr != bh->b_blocknr)) {
1575 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1576 "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1577 sdp->sd_fsname,
1578 (unsigned long long)ip->i_no_addr,
1579 prev_bnr, ip->i_height, strip_h, mp_h);
1580 }
1581 prev_bnr = bh->b_blocknr;
cb7f0903 1582
5cf26b1e
AG
1583 if (gfs2_metatype_check(sdp, bh,
1584 (mp_h ? GFS2_METATYPE_IN :
1585 GFS2_METATYPE_DI))) {
1586 ret = -EIO;
1587 goto out;
1588 }
1589
10d2cf94
AG
1590 /*
1591 * Below, passing end_aligned as 0 gives us the
1592 * metapointer range excluding the end point: the end
1593 * point is the first metapath we must not deallocate!
1594 */
1595
5cf26b1e 1596 metapointer_range(&mp, mp_h, start_list, start_aligned,
10d2cf94 1597 end_list, 0 /* end_aligned */,
5cf26b1e
AG
1598 &start, &end);
1599 ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1600 start, end,
1601 mp_h != ip->i_height - 1,
1602 &btotal);
cb7f0903 1603
d552a2b9
BP
1604 /* If we hit an error or just swept dinode buffer,
1605 just exit. */
1606 if (ret || !mp_h) {
1607 state = DEALLOC_DONE;
1608 break;
1609 }
1610 state = DEALLOC_MP_LOWER;
1611 break;
1612
1613 /* lower the metapath strip height */
1614 case DEALLOC_MP_LOWER:
1615 /* We're done with the current buffer, so release it,
1616 unless it's the dinode buffer. Then back up to the
1617 previous pointer. */
1618 if (mp_h) {
1619 brelse(mp.mp_bh[mp_h]);
1620 mp.mp_bh[mp_h] = NULL;
1621 }
1622 /* If we can't get any lower in height, we've stripped
1623 off all we can. Next step is to back up and start
1624 stripping the previous level of metadata. */
1625 if (mp_h == 0) {
1626 strip_h--;
cb7f0903 1627 memcpy(mp.mp_list, start_list, sizeof(start_list));
d552a2b9
BP
1628 mp_h = strip_h;
1629 state = DEALLOC_FILL_MP;
1630 break;
1631 }
1632 mp.mp_list[mp_h] = 0;
1633 mp_h--; /* search one metadata height down */
d552a2b9 1634 mp.mp_list[mp_h]++;
10d2cf94
AG
1635 if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1636 break;
d552a2b9
BP
1637 /* Here we've found a part of the metapath that is not
1638 * allocated. We need to search at that height for the
1639 * next non-null pointer. */
10d2cf94 1640 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
d552a2b9
BP
1641 state = DEALLOC_FILL_MP;
1642 mp_h++;
1643 }
1644 /* No more non-null pointers at this height. Back up
1645 to the previous height and try again. */
1646 break; /* loop around in the same state */
1647
1648 /* Fill the metapath with buffers to the given height. */
1649 case DEALLOC_FILL_MP:
1650 /* Fill the buffers out to the current height. */
1651 ret = fillup_metapath(ip, &mp, mp_h);
c3ce5aa9 1652 if (ret < 0)
d552a2b9 1653 goto out;
c3ce5aa9
AG
1654
1655 /* issue read-ahead on metadata */
1656 if (mp.mp_aheight > 1) {
5cf26b1e
AG
1657 for (; ret > 1; ret--) {
1658 metapointer_range(&mp, mp.mp_aheight - ret,
1659 start_list, start_aligned,
10d2cf94 1660 end_list, end_aligned,
5cf26b1e
AG
1661 &start, &end);
1662 gfs2_metapath_ra(ip->i_gl, start, end);
1663 }
c3ce5aa9 1664 }
d552a2b9
BP
1665
1666 /* If buffers found for the entire strip height */
e8b43fe0 1667 if (mp.mp_aheight - 1 == strip_h) {
d552a2b9
BP
1668 state = DEALLOC_MP_FULL;
1669 break;
1670 }
e8b43fe0
AG
1671 if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1672 mp_h = mp.mp_aheight - 1;
d552a2b9
BP
1673
1674 /* If we find a non-null block pointer, crawl a bit
1675 higher up in the metapath and try again, otherwise
1676 we need to look lower for a new starting point. */
10d2cf94 1677 if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
d552a2b9
BP
1678 mp_h++;
1679 else
1680 state = DEALLOC_MP_LOWER;
b3b94faa 1681 break;
d552a2b9 1682 }
b3b94faa
DT
1683 }
1684
d552a2b9
BP
1685 if (btotal) {
1686 if (current->journal_info == NULL) {
1687 ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1688 RES_QUOTA, 0);
1689 if (ret)
1690 goto out;
1691 down_write(&ip->i_rw_mutex);
1692 }
1693 gfs2_statfs_change(sdp, 0, +btotal, 0);
1694 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1695 ip->i_inode.i_gid);
b32c8c76 1696 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
d552a2b9
BP
1697 gfs2_trans_add_meta(ip->i_gl, dibh);
1698 gfs2_dinode_out(ip, dibh->b_data);
1699 up_write(&ip->i_rw_mutex);
1700 gfs2_trans_end(sdp);
1701 }
b3b94faa 1702
d552a2b9
BP
1703out:
1704 if (gfs2_holder_initialized(&rd_gh))
1705 gfs2_glock_dq_uninit(&rd_gh);
1706 if (current->journal_info) {
1707 up_write(&ip->i_rw_mutex);
1708 gfs2_trans_end(sdp);
1709 cond_resched();
1710 }
1711 gfs2_quota_unhold(ip);
1712out_metapath:
1713 release_metapath(&mp);
1714 return ret;
b3b94faa
DT
1715}
1716
1717static int trunc_end(struct gfs2_inode *ip)
1718{
feaa7bba 1719 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
b3b94faa
DT
1720 struct buffer_head *dibh;
1721 int error;
1722
1723 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1724 if (error)
1725 return error;
1726
1727 down_write(&ip->i_rw_mutex);
1728
1729 error = gfs2_meta_inode_buffer(ip, &dibh);
1730 if (error)
1731 goto out;
1732
a2e0f799 1733 if (!i_size_read(&ip->i_inode)) {
ecc30c79 1734 ip->i_height = 0;
ce276b06 1735 ip->i_goal = ip->i_no_addr;
b3b94faa 1736 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
45138990 1737 gfs2_ordered_del_inode(ip);
b3b94faa 1738 }
078cd827 1739 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
383f01fb 1740 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
b3b94faa 1741
350a9b0a 1742 gfs2_trans_add_meta(ip->i_gl, dibh);
539e5d6b 1743 gfs2_dinode_out(ip, dibh->b_data);
b3b94faa
DT
1744 brelse(dibh);
1745
a91ea69f 1746out:
b3b94faa 1747 up_write(&ip->i_rw_mutex);
b3b94faa 1748 gfs2_trans_end(sdp);
b3b94faa
DT
1749 return error;
1750}
1751
1752/**
1753 * do_shrink - make a file smaller
ff8f33c8 1754 * @inode: the inode
ff8f33c8 1755 * @newsize: the size to make the file
b3b94faa 1756 *
ff8f33c8
SW
1757 * Called with an exclusive lock on @inode. The @size must
1758 * be equal to or smaller than the current inode size.
b3b94faa
DT
1759 *
1760 * Returns: errno
1761 */
1762
8b5860a3 1763static int do_shrink(struct inode *inode, u64 newsize)
b3b94faa 1764{
ff8f33c8 1765 struct gfs2_inode *ip = GFS2_I(inode);
b3b94faa
DT
1766 int error;
1767
8b5860a3 1768 error = trunc_start(inode, newsize);
b3b94faa
DT
1769 if (error < 0)
1770 return error;
ff8f33c8 1771 if (gfs2_is_stuffed(ip))
b3b94faa
DT
1772 return 0;
1773
10d2cf94 1774 error = punch_hole(ip, newsize, 0);
ff8f33c8 1775 if (error == 0)
b3b94faa
DT
1776 error = trunc_end(ip);
1777
1778 return error;
1779}
1780
ff8f33c8 1781void gfs2_trim_blocks(struct inode *inode)
a13b8c5f 1782{
ff8f33c8
SW
1783 int ret;
1784
8b5860a3 1785 ret = do_shrink(inode, inode->i_size);
ff8f33c8
SW
1786 WARN_ON(ret != 0);
1787}
1788
1789/**
1790 * do_grow - Touch and update inode size
1791 * @inode: The inode
1792 * @size: The new size
1793 *
1794 * This function updates the timestamps on the inode and
1795 * may also increase the size of the inode. This function
1796 * must not be called with @size any smaller than the current
1797 * inode size.
1798 *
1799 * Although it is not strictly required to unstuff files here,
1800 * earlier versions of GFS2 have a bug in the stuffed file reading
1801 * code which will result in a buffer overrun if the size is larger
1802 * than the max stuffed file size. In order to prevent this from
25985edc 1803 * occurring, such files are unstuffed, but in other cases we can
ff8f33c8
SW
1804 * just update the inode size directly.
1805 *
1806 * Returns: 0 on success, or -ve on error
1807 */
1808
1809static int do_grow(struct inode *inode, u64 size)
1810{
1811 struct gfs2_inode *ip = GFS2_I(inode);
1812 struct gfs2_sbd *sdp = GFS2_SB(inode);
7b9cff46 1813 struct gfs2_alloc_parms ap = { .target = 1, };
a13b8c5f
WC
1814 struct buffer_head *dibh;
1815 int error;
2f7ee358 1816 int unstuff = 0;
a13b8c5f 1817
235628c5 1818 if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
b8fbf471 1819 error = gfs2_quota_lock_check(ip, &ap);
ff8f33c8 1820 if (error)
5407e242 1821 return error;
ff8f33c8 1822
7b9cff46 1823 error = gfs2_inplace_reserve(ip, &ap);
ff8f33c8
SW
1824 if (error)
1825 goto do_grow_qunlock;
2f7ee358 1826 unstuff = 1;
ff8f33c8
SW
1827 }
1828
a01aedfe
BP
1829 error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1830 (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1831 0 : RES_QUOTA), 0);
a13b8c5f 1832 if (error)
ff8f33c8 1833 goto do_grow_release;
a13b8c5f 1834
2f7ee358 1835 if (unstuff) {
ff8f33c8
SW
1836 error = gfs2_unstuff_dinode(ip, NULL);
1837 if (error)
1838 goto do_end_trans;
1839 }
a13b8c5f
WC
1840
1841 error = gfs2_meta_inode_buffer(ip, &dibh);
1842 if (error)
ff8f33c8 1843 goto do_end_trans;
a13b8c5f 1844
ff8f33c8 1845 i_size_write(inode, size);
078cd827 1846 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
350a9b0a 1847 gfs2_trans_add_meta(ip->i_gl, dibh);
a13b8c5f
WC
1848 gfs2_dinode_out(ip, dibh->b_data);
1849 brelse(dibh);
1850
ff8f33c8 1851do_end_trans:
a13b8c5f 1852 gfs2_trans_end(sdp);
ff8f33c8 1853do_grow_release:
2f7ee358 1854 if (unstuff) {
ff8f33c8
SW
1855 gfs2_inplace_release(ip);
1856do_grow_qunlock:
1857 gfs2_quota_unlock(ip);
ff8f33c8 1858 }
a13b8c5f
WC
1859 return error;
1860}
1861
b3b94faa 1862/**
ff8f33c8
SW
1863 * gfs2_setattr_size - make a file a given size
1864 * @inode: the inode
1865 * @newsize: the size to make the file
b3b94faa 1866 *
ff8f33c8 1867 * The file size can grow, shrink, or stay the same size. This
3e7aafc3 1868 * is called holding i_rwsem and an exclusive glock on the inode
ff8f33c8 1869 * in question.
b3b94faa
DT
1870 *
1871 * Returns: errno
1872 */
1873
ff8f33c8 1874int gfs2_setattr_size(struct inode *inode, u64 newsize)
b3b94faa 1875{
af5c2697 1876 struct gfs2_inode *ip = GFS2_I(inode);
ff8f33c8 1877 int ret;
b3b94faa 1878
ff8f33c8 1879 BUG_ON(!S_ISREG(inode->i_mode));
b3b94faa 1880
ff8f33c8
SW
1881 ret = inode_newsize_ok(inode, newsize);
1882 if (ret)
1883 return ret;
b3b94faa 1884
562c72aa
CH
1885 inode_dio_wait(inode);
1886
b54e9a0b 1887 ret = gfs2_rsqa_alloc(ip);
d2b47cfb 1888 if (ret)
2b3dcf35 1889 goto out;
d2b47cfb 1890
8b5860a3 1891 if (newsize >= inode->i_size) {
2b3dcf35
BP
1892 ret = do_grow(inode, newsize);
1893 goto out;
1894 }
ff8f33c8 1895
8b5860a3 1896 ret = do_shrink(inode, newsize);
2b3dcf35 1897out:
a097dc7e 1898 gfs2_rsqa_delete(ip, NULL);
2b3dcf35 1899 return ret;
b3b94faa
DT
1900}
1901
1902int gfs2_truncatei_resume(struct gfs2_inode *ip)
1903{
1904 int error;
10d2cf94 1905 error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
b3b94faa
DT
1906 if (!error)
1907 error = trunc_end(ip);
1908 return error;
1909}
1910
1911int gfs2_file_dealloc(struct gfs2_inode *ip)
1912{
10d2cf94 1913 return punch_hole(ip, 0, 0);
b3b94faa
DT
1914}
1915
b50f227b
SW
1916/**
1917 * gfs2_free_journal_extents - Free cached journal bmap info
1918 * @jd: The journal
1919 *
1920 */
1921
1922void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1923{
1924 struct gfs2_journal_extent *jext;
1925
1926 while(!list_empty(&jd->extent_list)) {
1927 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1928 list_del(&jext->list);
1929 kfree(jext);
1930 }
1931}
1932
1933/**
1934 * gfs2_add_jextent - Add or merge a new extent to extent cache
1935 * @jd: The journal descriptor
1936 * @lblock: The logical block at start of new extent
c62baf65 1937 * @dblock: The physical block at start of new extent
b50f227b
SW
1938 * @blocks: Size of extent in fs blocks
1939 *
1940 * Returns: 0 on success or -ENOMEM
1941 */
1942
1943static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1944{
1945 struct gfs2_journal_extent *jext;
1946
1947 if (!list_empty(&jd->extent_list)) {
1948 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1949 if ((jext->dblock + jext->blocks) == dblock) {
1950 jext->blocks += blocks;
1951 return 0;
1952 }
1953 }
1954
1955 jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1956 if (jext == NULL)
1957 return -ENOMEM;
1958 jext->dblock = dblock;
1959 jext->lblock = lblock;
1960 jext->blocks = blocks;
1961 list_add_tail(&jext->list, &jd->extent_list);
1962 jd->nr_extents++;
1963 return 0;
1964}
1965
1966/**
1967 * gfs2_map_journal_extents - Cache journal bmap info
1968 * @sdp: The super block
1969 * @jd: The journal to map
1970 *
1971 * Create a reusable "extent" mapping from all logical
1972 * blocks to all physical blocks for the given journal. This will save
1973 * us time when writing journal blocks. Most journals will have only one
1974 * extent that maps all their logical blocks. That's because gfs2.mkfs
1975 * arranges the journal blocks sequentially to maximize performance.
1976 * So the extent would map the first block for the entire file length.
1977 * However, gfs2_jadd can happen while file activity is happening, so
1978 * those journals may not be sequential. Less likely is the case where
1979 * the users created their own journals by mounting the metafs and
1980 * laying it out. But it's still possible. These journals might have
1981 * several extents.
1982 *
1983 * Returns: 0 on success, or error on failure
1984 */
1985
1986int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1987{
1988 u64 lblock = 0;
1989 u64 lblock_stop;
1990 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1991 struct buffer_head bh;
1992 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1993 u64 size;
1994 int rc;
1995
1996 lblock_stop = i_size_read(jd->jd_inode) >> shift;
1997 size = (lblock_stop - lblock) << shift;
1998 jd->nr_extents = 0;
1999 WARN_ON(!list_empty(&jd->extent_list));
2000
2001 do {
2002 bh.b_state = 0;
2003 bh.b_blocknr = 0;
2004 bh.b_size = size;
2005 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
2006 if (rc || !buffer_mapped(&bh))
2007 goto fail;
2008 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
2009 if (rc)
2010 goto fail;
2011 size -= bh.b_size;
2012 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2013 } while(size > 0);
2014
2015 fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
2016 jd->nr_extents);
2017 return 0;
2018
2019fail:
2020 fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
2021 rc, jd->jd_jid,
2022 (unsigned long long)(i_size_read(jd->jd_inode) - size),
2023 jd->nr_extents);
2024 fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
2025 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
2026 bh.b_state, (unsigned long long)bh.b_size);
2027 gfs2_free_journal_extents(jd);
2028 return rc;
2029}
2030
b3b94faa
DT
2031/**
2032 * gfs2_write_alloc_required - figure out if a write will require an allocation
2033 * @ip: the file being written to
2034 * @offset: the offset to write to
2035 * @len: the number of bytes being written
b3b94faa 2036 *
461cb419 2037 * Returns: 1 if an alloc is required, 0 otherwise
b3b94faa
DT
2038 */
2039
cd915493 2040int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
461cb419 2041 unsigned int len)
b3b94faa 2042{
feaa7bba 2043 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
941e6d7d
SW
2044 struct buffer_head bh;
2045 unsigned int shift;
2046 u64 lblock, lblock_stop, size;
7ed122e4 2047 u64 end_of_file;
b3b94faa 2048
b3b94faa
DT
2049 if (!len)
2050 return 0;
2051
2052 if (gfs2_is_stuffed(ip)) {
235628c5 2053 if (offset + len > gfs2_max_stuffed_size(ip))
461cb419 2054 return 1;
b3b94faa
DT
2055 return 0;
2056 }
2057
941e6d7d 2058 shift = sdp->sd_sb.sb_bsize_shift;
7ed122e4 2059 BUG_ON(gfs2_is_dir(ip));
a2e0f799 2060 end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
7ed122e4
SW
2061 lblock = offset >> shift;
2062 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2063 if (lblock_stop > end_of_file)
461cb419 2064 return 1;
b3b94faa 2065
941e6d7d
SW
2066 size = (lblock_stop - lblock) << shift;
2067 do {
2068 bh.b_state = 0;
2069 bh.b_size = size;
2070 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
2071 if (!buffer_mapped(&bh))
461cb419 2072 return 1;
941e6d7d
SW
2073 size -= bh.b_size;
2074 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
2075 } while(size > 0);
b3b94faa
DT
2076
2077 return 0;
2078}
2079
4e56a641
AG
2080static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
2081{
2082 struct gfs2_inode *ip = GFS2_I(inode);
2083 struct buffer_head *dibh;
2084 int error;
2085
2086 if (offset >= inode->i_size)
2087 return 0;
2088 if (offset + length > inode->i_size)
2089 length = inode->i_size - offset;
2090
2091 error = gfs2_meta_inode_buffer(ip, &dibh);
2092 if (error)
2093 return error;
2094 gfs2_trans_add_meta(ip->i_gl, dibh);
2095 memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
2096 length);
2097 brelse(dibh);
2098 return 0;
2099}
2100
2101static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
2102 loff_t length)
2103{
2104 struct gfs2_sbd *sdp = GFS2_SB(inode);
2105 loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
2106 int error;
2107
2108 while (length) {
2109 struct gfs2_trans *tr;
2110 loff_t chunk;
2111 unsigned int offs;
2112
2113 chunk = length;
2114 if (chunk > max_chunk)
2115 chunk = max_chunk;
2116
2117 offs = offset & ~PAGE_MASK;
2118 if (offs && chunk > PAGE_SIZE)
2119 chunk = offs + ((chunk - offs) & PAGE_MASK);
2120
2121 truncate_pagecache_range(inode, offset, chunk);
2122 offset += chunk;
2123 length -= chunk;
2124
2125 tr = current->journal_info;
2126 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2127 continue;
2128
2129 gfs2_trans_end(sdp);
2130 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2131 if (error)
2132 return error;
2133 }
2134 return 0;
2135}
2136
2137int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2138{
2139 struct inode *inode = file_inode(file);
2140 struct gfs2_inode *ip = GFS2_I(inode);
2141 struct gfs2_sbd *sdp = GFS2_SB(inode);
2142 int error;
2143
2144 if (gfs2_is_jdata(ip))
2145 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2146 GFS2_JTRUNC_REVOKES);
2147 else
2148 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2149 if (error)
2150 return error;
2151
2152 if (gfs2_is_stuffed(ip)) {
2153 error = stuffed_zero_range(inode, offset, length);
2154 if (error)
2155 goto out;
2156 } else {
2157 unsigned int start_off, end_off, blocksize;
2158
2159 blocksize = i_blocksize(inode);
2160 start_off = offset & (blocksize - 1);
2161 end_off = (offset + length) & (blocksize - 1);
2162 if (start_off) {
2163 unsigned int len = length;
2164 if (length > blocksize - start_off)
2165 len = blocksize - start_off;
2166 error = gfs2_block_zero_range(inode, offset, len);
2167 if (error)
2168 goto out;
2169 if (start_off + length < blocksize)
2170 end_off = 0;
2171 }
2172 if (end_off) {
2173 error = gfs2_block_zero_range(inode,
2174 offset + length - end_off, end_off);
2175 if (error)
2176 goto out;
2177 }
2178 }
2179
2180 if (gfs2_is_jdata(ip)) {
2181 BUG_ON(!current->journal_info);
2182 gfs2_journaled_truncate_range(inode, offset, length);
2183 } else
2184 truncate_pagecache_range(inode, offset, offset + length - 1);
2185
2186 file_update_time(file);
2187 mark_inode_dirty(inode);
2188
2189 if (current->journal_info)
2190 gfs2_trans_end(sdp);
2191
2192 if (!gfs2_is_stuffed(ip))
2193 error = punch_hole(ip, offset, length);
2194
2195out:
2196 if (current->journal_info)
2197 gfs2_trans_end(sdp);
2198 return error;
2199}