Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux-2.6-block.git] / fs / gfs2 / lops.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3a8a9a10 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
75ca61c1 15#include <linux/mempool.h>
5c676f6d 16#include <linux/gfs2_ondisk.h>
c969f58c
SW
17#include <linux/bio.h>
18#include <linux/fs.h>
7f63257d 19#include <linux/list_sort.h>
2a5f14f2 20#include <linux/blkdev.h>
b3b94faa 21
2a5f14f2 22#include "bmap.h"
c1696fb8 23#include "dir.h"
b3b94faa 24#include "gfs2.h"
5c676f6d 25#include "incore.h"
2332c443 26#include "inode.h"
b3b94faa
DT
27#include "glock.h"
28#include "log.h"
29#include "lops.h"
30#include "meta_io.h"
31#include "recovery.h"
32#include "rgrp.h"
33#include "trans.h"
5c676f6d 34#include "util.h"
63997775 35#include "trace_gfs2.h"
b3b94faa 36
9b9107a5
SW
37/**
38 * gfs2_pin - Pin a buffer in memory
39 * @sdp: The superblock
40 * @bh: The buffer to be pinned
41 *
42 * The log lock must be held when calling this function
43 */
767f433f 44void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
9b9107a5
SW
45{
46 struct gfs2_bufdata *bd;
47
29687a2a 48 BUG_ON(!current->journal_info);
9b9107a5
SW
49
50 clear_buffer_dirty(bh);
51 if (test_set_buffer_pinned(bh))
52 gfs2_assert_withdraw(sdp, 0);
53 if (!buffer_uptodate(bh))
9e1a9ecd 54 gfs2_io_error_bh_wd(sdp, bh);
9b9107a5
SW
55 bd = bh->b_private;
56 /* If this buffer is in the AIL and it has already been written
57 * to in-place disk block, remove it from the AIL.
58 */
c618e87a 59 spin_lock(&sdp->sd_ail_lock);
16ca9412
BM
60 if (bd->bd_tr)
61 list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
c618e87a 62 spin_unlock(&sdp->sd_ail_lock);
9b9107a5 63 get_bh(bh);
5e687eac 64 atomic_inc(&sdp->sd_log_pinned);
63997775 65 trace_gfs2_pin(bd, 1);
9b9107a5
SW
66}
67
7c9ca621
BP
68static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
69{
70 return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
71}
72
73static void maybe_release_space(struct gfs2_bufdata *bd)
74{
75 struct gfs2_glock *gl = bd->bd_gl;
15562c43 76 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
6f6597ba 77 struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
7c9ca621
BP
78 unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
79 struct gfs2_bitmap *bi = rgd->rd_bits + index;
80
5a7c6690 81 if (bi->bi_clone == NULL)
7c9ca621
BP
82 return;
83 if (sdp->sd_args.ar_discard)
66fc061b 84 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
7c9ca621 85 memcpy(bi->bi_clone + bi->bi_offset,
281b4952 86 bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
7c9ca621
BP
87 clear_bit(GBF_FULL, &bi->bi_flags);
88 rgd->rd_free_clone = rgd->rd_free;
5ea5050c 89 rgd->rd_extfail_pt = rgd->rd_free;
7c9ca621
BP
90}
91
9b9107a5
SW
92/**
93 * gfs2_unpin - Unpin a buffer
94 * @sdp: the filesystem the buffer belongs to
95 * @bh: The buffer to unpin
96 * @ai:
29687a2a 97 * @flags: The inode dirty flags
9b9107a5
SW
98 *
99 */
100
101static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
16ca9412 102 struct gfs2_trans *tr)
9b9107a5
SW
103{
104 struct gfs2_bufdata *bd = bh->b_private;
105
29687a2a
SW
106 BUG_ON(!buffer_uptodate(bh));
107 BUG_ON(!buffer_pinned(bh));
9b9107a5
SW
108
109 lock_buffer(bh);
110 mark_buffer_dirty(bh);
111 clear_buffer_pinned(bh);
112
7c9ca621
BP
113 if (buffer_is_rgrp(bd))
114 maybe_release_space(bd);
115
d6a079e8 116 spin_lock(&sdp->sd_ail_lock);
16ca9412 117 if (bd->bd_tr) {
9b9107a5
SW
118 list_del(&bd->bd_ail_st_list);
119 brelse(bh);
120 } else {
121 struct gfs2_glock *gl = bd->bd_gl;
122 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
123 atomic_inc(&gl->gl_ail_count);
124 }
16ca9412
BM
125 bd->bd_tr = tr;
126 list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
d6a079e8
DC
127 spin_unlock(&sdp->sd_ail_lock);
128
29687a2a 129 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
63997775 130 trace_gfs2_pin(bd, 0);
9b9107a5 131 unlock_buffer(bh);
5e687eac 132 atomic_dec(&sdp->sd_log_pinned);
9b9107a5
SW
133}
134
e8c92ed7 135static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
16615be1 136{
e8c92ed7
SW
137 BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
138 (sdp->sd_log_flush_head != sdp->sd_log_head));
139
722f6f62 140 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
e8c92ed7 141 sdp->sd_log_flush_head = 0;
16615be1
SW
142}
143
c1696fb8 144u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
16615be1 145{
e8c92ed7
SW
146 unsigned int lbn = sdp->sd_log_flush_head;
147 struct gfs2_journal_extent *je;
148 u64 block;
149
b50f227b
SW
150 list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) {
151 if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) {
e8c92ed7
SW
152 block = je->dblock + lbn - je->lblock;
153 gfs2_log_incr_head(sdp);
154 return block;
155 }
156 }
157
158 return -1;
16615be1
SW
159}
160
e8c92ed7
SW
161/**
162 * gfs2_end_log_write_bh - end log write of pagecache data with buffers
163 * @sdp: The superblock
164 * @bvec: The bio_vec
165 * @error: The i/o status
166 *
4519eaad 167 * This finds the relevant buffers and unlocks them and sets the
e8c92ed7
SW
168 * error flag according to the status of the i/o request. This is
169 * used when the log is writing data which has an in-place version
170 * that is pinned in the pagecache.
171 */
172
173static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
4e4cbee9 174 blk_status_t error)
16615be1 175{
e8c92ed7
SW
176 struct buffer_head *bh, *next;
177 struct page *page = bvec->bv_page;
178 unsigned size;
179
180 bh = page_buffers(page);
181 size = bvec->bv_len;
182 while (bh_offset(bh) < bvec->bv_offset)
183 bh = bh->b_this_page;
184 do {
185 if (error)
87354e5d 186 mark_buffer_write_io_error(bh);
e8c92ed7
SW
187 unlock_buffer(bh);
188 next = bh->b_this_page;
189 size -= bh->b_size;
190 brelse(bh);
191 bh = next;
192 } while(bh && size);
16615be1
SW
193}
194
47ac5537 195/**
e8c92ed7
SW
196 * gfs2_end_log_write - end of i/o to the log
197 * @bio: The bio
e8c92ed7
SW
198 *
199 * Each bio_vec contains either data from the pagecache or data
200 * relating to the log itself. Here we iterate over the bio_vec
201 * array, processing both kinds of data.
47ac5537
SW
202 *
203 */
204
4246a0b6 205static void gfs2_end_log_write(struct bio *bio)
47ac5537 206{
e8c92ed7
SW
207 struct gfs2_sbd *sdp = bio->bi_private;
208 struct bio_vec *bvec;
209 struct page *page;
210 int i;
211
942b0cdd
BP
212 if (bio->bi_status) {
213 fs_err(sdp, "Error %d writing to journal, jid=%u\n",
214 bio->bi_status, sdp->sd_jdesc->jd_jid);
215 wake_up(&sdp->sd_logd_waitq);
216 }
e8c92ed7 217
e97e548b 218 bio_for_each_segment_all(bvec, bio, i) {
e8c92ed7
SW
219 page = bvec->bv_page;
220 if (page_has_buffers(page))
4e4cbee9 221 gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
e8c92ed7
SW
222 else
223 mempool_free(page, gfs2_page_pool);
224 }
47ac5537 225
e8c92ed7 226 bio_put(bio);
47ac5537
SW
227 if (atomic_dec_and_test(&sdp->sd_log_in_flight))
228 wake_up(&sdp->sd_log_flush_wait);
229}
230
231/**
5b846095
AD
232 * gfs2_log_submit_bio - Submit any pending log bio
233 * @biop: Address of the bio pointer
2a5f14f2 234 * @opf: REQ_OP | op_flags
47ac5537 235 *
e8c92ed7
SW
236 * Submit any pending part-built or full bio to the block device. If
237 * there is no pending bio, then this is a no-op.
47ac5537
SW
238 */
239
2a5f14f2 240void gfs2_log_submit_bio(struct bio **biop, int opf)
47ac5537 241{
5b846095
AD
242 struct bio *bio = *biop;
243 if (bio) {
244 struct gfs2_sbd *sdp = bio->bi_private;
e8c92ed7 245 atomic_inc(&sdp->sd_log_in_flight);
2a5f14f2 246 bio->bi_opf = opf;
5b846095
AD
247 submit_bio(bio);
248 *biop = NULL;
e8c92ed7
SW
249 }
250}
47ac5537 251
e8c92ed7 252/**
5b846095
AD
253 * gfs2_log_alloc_bio - Allocate a bio
254 * @sdp: The super block
255 * @blkno: The device block number we want to write to
256 * @end_io: The bi_end_io callback
e8c92ed7 257 *
5b846095 258 * Allocate a new bio, initialize it with the given parameters and return it.
e8c92ed7 259 *
5b846095 260 * Returns: The newly allocated bio
e8c92ed7
SW
261 */
262
5b846095
AD
263static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
264 bio_end_io_t *end_io)
e8c92ed7
SW
265{
266 struct super_block *sb = sdp->sd_vfs;
5b846095 267 struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
e8c92ed7 268
4f024f37 269 bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
74d46992 270 bio_set_dev(bio, sb->s_bdev);
5b846095 271 bio->bi_end_io = end_io;
e8c92ed7
SW
272 bio->bi_private = sdp;
273
e8c92ed7 274 return bio;
47ac5537
SW
275}
276
277/**
e8c92ed7 278 * gfs2_log_get_bio - Get cached log bio, or allocate a new one
5b846095 279 * @sdp: The super block
e8c92ed7 280 * @blkno: The device block number we want to write to
5b846095
AD
281 * @bio: The bio to get or allocate
282 * @op: REQ_OP
283 * @end_io: The bi_end_io callback
284 * @flush: Always flush the current bio and allocate a new one?
e8c92ed7
SW
285 *
286 * If there is a cached bio, then if the next block number is sequential
287 * with the previous one, return it, otherwise flush the bio to the
5b846095 288 * device. If there is no cached bio, or we just flushed it, then
e8c92ed7 289 * allocate a new one.
47ac5537 290 *
e8c92ed7 291 * Returns: The bio to use for log writes
47ac5537
SW
292 */
293
5b846095
AD
294static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
295 struct bio **biop, int op,
296 bio_end_io_t *end_io, bool flush)
47ac5537 297{
5b846095 298 struct bio *bio = *biop;
e8c92ed7
SW
299
300 if (bio) {
5b846095
AD
301 u64 nblk;
302
f73a1c7d 303 nblk = bio_end_sector(bio);
e8c92ed7 304 nblk >>= sdp->sd_fsb2bb_shift;
5b846095 305 if (blkno == nblk && !flush)
e8c92ed7 306 return bio;
2a5f14f2 307 gfs2_log_submit_bio(biop, op);
e8c92ed7
SW
308 }
309
5b846095
AD
310 *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
311 return *biop;
47ac5537
SW
312}
313
314/**
e8c92ed7 315 * gfs2_log_write - write to log
47ac5537 316 * @sdp: the filesystem
e8c92ed7
SW
317 * @page: the page to write
318 * @size: the size of the data to write
319 * @offset: the offset within the page
c1696fb8 320 * @blkno: block number of the log entry
47ac5537 321 *
e8c92ed7
SW
322 * Try and add the page segment to the current bio. If that fails,
323 * submit the current bio to the device and create a new one, and
324 * then add the page segment to that.
47ac5537
SW
325 */
326
c1696fb8
BP
327void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
328 unsigned size, unsigned offset, u64 blkno)
47ac5537 329{
e8c92ed7
SW
330 struct bio *bio;
331 int ret;
332
5b846095
AD
333 bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
334 gfs2_end_log_write, false);
e8c92ed7
SW
335 ret = bio_add_page(bio, page, size, offset);
336 if (ret == 0) {
5b846095
AD
337 bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
338 REQ_OP_WRITE, gfs2_end_log_write, true);
e8c92ed7
SW
339 ret = bio_add_page(bio, page, size, offset);
340 WARN_ON(ret == 0);
341 }
342}
47ac5537 343
e8c92ed7
SW
344/**
345 * gfs2_log_write_bh - write a buffer's content to the log
346 * @sdp: The super block
347 * @bh: The buffer pointing to the in-place location
348 *
349 * This writes the content of the buffer to the next available location
350 * in the log. The buffer will be unlocked once the i/o to the log has
351 * completed.
352 */
353
354static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
355{
c1696fb8
BP
356 gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh),
357 gfs2_log_bmap(sdp));
e8c92ed7 358}
47ac5537 359
e8c92ed7
SW
360/**
361 * gfs2_log_write_page - write one block stored in a page, into the log
362 * @sdp: The superblock
363 * @page: The struct page
364 *
365 * This writes the first block-sized part of the page into the log. Note
366 * that the page must have been allocated from the gfs2_page_pool mempool
367 * and that after this has been called, ownership has been transferred and
368 * the page may be freed at any time.
369 */
47ac5537 370
e8c92ed7
SW
371void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
372{
373 struct super_block *sb = sdp->sd_vfs;
c1696fb8
BP
374 gfs2_log_write(sdp, page, sb->s_blocksize, 0,
375 gfs2_log_bmap(sdp));
47ac5537 376}
16615be1 377
2a5f14f2
AD
378/**
379 * gfs2_end_log_read - end I/O callback for reads from the log
380 * @bio: The bio
381 *
382 * Simply unlock the pages in the bio. The main thread will wait on them and
383 * process them in order as necessary.
384 */
385
386static void gfs2_end_log_read(struct bio *bio)
387{
388 struct page *page;
389 struct bio_vec *bvec;
390 int i;
391
392 bio_for_each_segment_all(bvec, bio, i) {
393 page = bvec->bv_page;
394 if (bio->bi_status) {
395 int err = blk_status_to_errno(bio->bi_status);
396
397 SetPageError(page);
398 mapping_set_error(page->mapping, err);
399 }
400 unlock_page(page);
401 }
402
403 bio_put(bio);
404}
405
406/**
407 * gfs2_jhead_pg_srch - Look for the journal head in a given page.
408 * @jd: The journal descriptor
409 * @page: The page to look in
410 *
411 * Returns: 1 if found, 0 otherwise.
412 */
413
414static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
415 struct gfs2_log_header_host *head,
416 struct page *page)
417{
418 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
419 struct gfs2_log_header_host uninitialized_var(lh);
420 void *kaddr = kmap_atomic(page);
421 unsigned int offset;
422 bool ret = false;
423
424 for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
425 if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
426 if (lh.lh_sequence > head->lh_sequence)
427 *head = lh;
428 else {
429 ret = true;
430 break;
431 }
432 }
433 }
434 kunmap_atomic(kaddr);
435 return ret;
436}
437
438/**
439 * gfs2_jhead_process_page - Search/cleanup a page
440 * @jd: The journal descriptor
441 * @index: Index of the page to look into
442 * @done: If set, perform only cleanup, else search and set if found.
443 *
444 * Find the page with 'index' in the journal's mapping. Search the page for
445 * the journal head if requested (cleanup == false). Release refs on the
446 * page so the page cache can reclaim it (put_page() twice). We grabbed a
447 * reference on this page two times, first when we did a find_or_create_page()
448 * to obtain the page to add it to the bio and second when we do a
449 * find_get_page() here to get the page to wait on while I/O on it is being
450 * completed.
451 * This function is also used to free up a page we might've grabbed but not
452 * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
453 * submitted the I/O, but we already found the jhead so we only need to drop
454 * our references to the page.
455 */
456
457static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
458 struct gfs2_log_header_host *head,
459 bool *done)
460{
461 struct page *page;
462
463 page = find_get_page(jd->jd_inode->i_mapping, index);
464 wait_on_page_locked(page);
465
466 if (PageError(page))
467 *done = true;
468
469 if (!*done)
470 *done = gfs2_jhead_pg_srch(jd, head, page);
471
472 put_page(page); /* Once for find_get_page */
473 put_page(page); /* Once more for find_or_create_page */
474}
475
476/**
477 * gfs2_find_jhead - find the head of a log
478 * @jd: The journal descriptor
479 * @head: The log descriptor for the head of the log is returned here
480 *
481 * Do a search of a journal by reading it in large chunks using bios and find
482 * the valid log entry with the highest sequence number. (i.e. the log head)
483 *
484 * Returns: 0 on success, errno otherwise
485 */
486
487int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
488{
489 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
490 struct address_space *mapping = jd->jd_inode->i_mapping;
491 struct gfs2_journal_extent *je;
492 u32 block, read_idx = 0, submit_idx = 0, index = 0;
493 int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
494 int blocks_per_page = 1 << shift, sz, ret = 0;
495 struct bio *bio = NULL;
496 struct page *page;
497 bool done = false;
498 errseq_t since;
499
500 memset(head, 0, sizeof(*head));
501 if (list_empty(&jd->extent_list))
502 gfs2_map_journal_extents(sdp, jd);
503
504 since = filemap_sample_wb_err(mapping);
505 list_for_each_entry(je, &jd->extent_list, list) {
506 for (block = 0; block < je->blocks; block += blocks_per_page) {
507 index = (je->lblock + block) >> shift;
508
509 page = find_or_create_page(mapping, index, GFP_NOFS);
510 if (!page) {
511 ret = -ENOMEM;
512 done = true;
513 goto out;
514 }
515
516 if (bio) {
517 sz = bio_add_page(bio, page, PAGE_SIZE, 0);
518 if (sz == PAGE_SIZE)
519 goto page_added;
520 submit_idx = index;
521 submit_bio(bio);
522 bio = NULL;
523 }
524
525 bio = gfs2_log_alloc_bio(sdp,
526 je->dblock + (index << shift),
527 gfs2_end_log_read);
528 bio->bi_opf = REQ_OP_READ;
529 sz = bio_add_page(bio, page, PAGE_SIZE, 0);
530 gfs2_assert_warn(sdp, sz == PAGE_SIZE);
531
532page_added:
533 if (submit_idx <= read_idx + BIO_MAX_PAGES) {
534 /* Keep at least one bio in flight */
535 continue;
536 }
537
538 gfs2_jhead_process_page(jd, read_idx++, head, &done);
539 if (done)
540 goto out; /* found */
541 }
542 }
543
544out:
545 if (bio)
546 submit_bio(bio);
547 while (read_idx <= index)
548 gfs2_jhead_process_page(jd, read_idx++, head, &done);
549
550 if (!ret)
551 ret = filemap_check_wb_err(mapping, since);
552
553 return ret;
554}
555
dad30e90
SW
556static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
557 u32 ld_length, u32 ld_data1)
16615be1 558{
144a4c2f 559 struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
e8c92ed7
SW
560 struct gfs2_log_descriptor *ld = page_address(page);
561 clear_page(ld);
16615be1
SW
562 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
563 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
564 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
565 ld->ld_type = cpu_to_be32(ld_type);
dad30e90
SW
566 ld->ld_length = cpu_to_be32(ld_length);
567 ld->ld_data1 = cpu_to_be32(ld_data1);
16615be1 568 ld->ld_data2 = 0;
e8c92ed7 569 return page;
16615be1
SW
570}
571
dad30e90
SW
572static void gfs2_check_magic(struct buffer_head *bh)
573{
574 void *kaddr;
575 __be32 *ptr;
576
577 clear_buffer_escaped(bh);
578 kaddr = kmap_atomic(bh->b_page);
579 ptr = kaddr + bh_offset(bh);
580 if (*ptr == cpu_to_be32(GFS2_MAGIC))
581 set_buffer_escaped(bh);
582 kunmap_atomic(kaddr);
583}
584
7f63257d
BM
585static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
586{
587 struct gfs2_bufdata *bda, *bdb;
588
589 bda = list_entry(a, struct gfs2_bufdata, bd_list);
590 bdb = list_entry(b, struct gfs2_bufdata, bd_list);
591
592 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
593 return -1;
594 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
595 return 1;
596 return 0;
597}
598
dad30e90
SW
599static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
600 unsigned int total, struct list_head *blist,
601 bool is_databuf)
b3b94faa 602{
b3b94faa
DT
603 struct gfs2_log_descriptor *ld;
604 struct gfs2_bufdata *bd1 = NULL, *bd2;
e8c92ed7 605 struct page *page;
b3b94faa
DT
606 unsigned int num;
607 unsigned n;
608 __be64 *ptr;
609
905d2aef 610 gfs2_log_lock(sdp);
7f63257d 611 list_sort(NULL, blist, blocknr_cmp);
c0752aa7 612 bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
b3b94faa
DT
613 while(total) {
614 num = total;
615 if (total > limit)
616 num = limit;
905d2aef 617 gfs2_log_unlock(sdp);
4a586812
BP
618 page = gfs2_get_log_desc(sdp,
619 is_databuf ? GFS2_LOG_DESC_JDATA :
620 GFS2_LOG_DESC_METADATA, num + 1, num);
e8c92ed7 621 ld = page_address(page);
905d2aef 622 gfs2_log_lock(sdp);
e8c92ed7 623 ptr = (__be64 *)(ld + 1);
b3b94faa
DT
624
625 n = 0;
c0752aa7 626 list_for_each_entry_continue(bd1, blist, bd_list) {
b3b94faa 627 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
dad30e90
SW
628 if (is_databuf) {
629 gfs2_check_magic(bd1->bd_bh);
630 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
631 }
b3b94faa
DT
632 if (++n >= num)
633 break;
634 }
635
905d2aef 636 gfs2_log_unlock(sdp);
e8c92ed7 637 gfs2_log_write_page(sdp, page);
905d2aef 638 gfs2_log_lock(sdp);
b3b94faa
DT
639
640 n = 0;
c0752aa7 641 list_for_each_entry_continue(bd2, blist, bd_list) {
16615be1 642 get_bh(bd2->bd_bh);
905d2aef 643 gfs2_log_unlock(sdp);
16615be1 644 lock_buffer(bd2->bd_bh);
dad30e90
SW
645
646 if (buffer_escaped(bd2->bd_bh)) {
647 void *kaddr;
648 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
649 ptr = page_address(page);
650 kaddr = kmap_atomic(bd2->bd_bh->b_page);
651 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
652 bd2->bd_bh->b_size);
653 kunmap_atomic(kaddr);
654 *(__be32 *)ptr = 0;
655 clear_buffer_escaped(bd2->bd_bh);
656 unlock_buffer(bd2->bd_bh);
657 brelse(bd2->bd_bh);
658 gfs2_log_write_page(sdp, page);
659 } else {
660 gfs2_log_write_bh(sdp, bd2->bd_bh);
661 }
905d2aef 662 gfs2_log_lock(sdp);
b3b94faa
DT
663 if (++n >= num)
664 break;
665 }
666
905d2aef 667 BUG_ON(total < num);
b3b94faa
DT
668 total -= num;
669 }
905d2aef 670 gfs2_log_unlock(sdp);
b3b94faa
DT
671}
672
d69a3c65 673static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
dad30e90
SW
674{
675 unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
022ef4fe 676 unsigned int nbuf;
d69a3c65
SW
677 if (tr == NULL)
678 return;
022ef4fe
SW
679 nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
680 gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
dad30e90
SW
681}
682
16ca9412 683static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
b3b94faa 684{
d69a3c65 685 struct list_head *head;
b3b94faa
DT
686 struct gfs2_bufdata *bd;
687
d69a3c65 688 if (tr == NULL)
16ca9412 689 return;
16ca9412 690
d69a3c65 691 head = &tr->tr_buf;
b3b94faa 692 while (!list_empty(head)) {
c0752aa7
BP
693 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
694 list_del_init(&bd->bd_list);
16ca9412 695 gfs2_unpin(sdp, bd->bd_bh, tr);
b3b94faa 696 }
b3b94faa
DT
697}
698
699static void buf_lo_before_scan(struct gfs2_jdesc *jd,
55167622 700 struct gfs2_log_header_host *head, int pass)
b3b94faa 701{
b3b94faa
DT
702 if (pass != 0)
703 return;
704
a17d758b
BP
705 jd->jd_found_blocks = 0;
706 jd->jd_replayed_blocks = 0;
b3b94faa
DT
707}
708
709static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
710 struct gfs2_log_descriptor *ld, __be64 *ptr,
711 int pass)
712{
feaa7bba
SW
713 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
714 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
5c676f6d 715 struct gfs2_glock *gl = ip->i_gl;
b3b94faa
DT
716 unsigned int blks = be32_to_cpu(ld->ld_data1);
717 struct buffer_head *bh_log, *bh_ip;
cd915493 718 u64 blkno;
b3b94faa
DT
719 int error = 0;
720
721 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
722 return 0;
723
e1cb6be9 724 gfs2_replay_incr_blk(jd, &start);
b3b94faa 725
e1cb6be9 726 for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
b3b94faa
DT
727 blkno = be64_to_cpu(*ptr++);
728
a17d758b 729 jd->jd_found_blocks++;
b3b94faa 730
a17d758b 731 if (gfs2_revoke_check(jd, blkno, start))
b3b94faa
DT
732 continue;
733
734 error = gfs2_replay_read_block(jd, start, &bh_log);
82ffa516
SW
735 if (error)
736 return error;
b3b94faa
DT
737
738 bh_ip = gfs2_meta_new(gl, blkno);
739 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
740
741 if (gfs2_meta_check(sdp, bh_ip))
742 error = -EIO;
743 else
744 mark_buffer_dirty(bh_ip);
745
746 brelse(bh_log);
747 brelse(bh_ip);
748
749 if (error)
750 break;
751
a17d758b 752 jd->jd_replayed_blocks++;
b3b94faa
DT
753 }
754
755 return error;
756}
757
7c0ef28a
SW
758/**
759 * gfs2_meta_sync - Sync all buffers associated with a glock
760 * @gl: The glock
761 *
762 */
763
764static void gfs2_meta_sync(struct gfs2_glock *gl)
765{
766 struct address_space *mapping = gfs2_glock2aspace(gl);
15562c43 767 struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
7c0ef28a
SW
768 int error;
769
70d4ee94
SW
770 if (mapping == NULL)
771 mapping = &sdp->sd_aspace;
772
7c0ef28a
SW
773 filemap_fdatawrite(mapping);
774 error = filemap_fdatawait(mapping);
775
776 if (error)
15562c43 777 gfs2_io_error(gl->gl_name.ln_sbd);
7c0ef28a
SW
778}
779
b3b94faa
DT
780static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
781{
feaa7bba
SW
782 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
783 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
784
785 if (error) {
7276b3b0 786 gfs2_meta_sync(ip->i_gl);
b3b94faa
DT
787 return;
788 }
789 if (pass != 1)
790 return;
791
7276b3b0 792 gfs2_meta_sync(ip->i_gl);
b3b94faa
DT
793
794 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
a17d758b 795 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
b3b94faa
DT
796}
797
d69a3c65 798static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
b3b94faa 799{
b3b94faa 800 struct gfs2_meta_header *mh;
b3b94faa
DT
801 unsigned int offset;
802 struct list_head *head = &sdp->sd_log_le_revoke;
82e86087 803 struct gfs2_bufdata *bd;
e8c92ed7 804 struct page *page;
dad30e90 805 unsigned int length;
b3b94faa 806
5d054964 807 gfs2_write_revokes(sdp);
b3b94faa
DT
808 if (!sdp->sd_log_num_revoke)
809 return;
810
dad30e90
SW
811 length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
812 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
b3b94faa
DT
813 offset = sizeof(struct gfs2_log_descriptor);
814
c0752aa7 815 list_for_each_entry(bd, head, bd_list) {
b3b94faa
DT
816 sdp->sd_log_num_revoke--;
817
cd915493 818 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
b3b94faa 819
e8c92ed7
SW
820 gfs2_log_write_page(sdp, page);
821 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
822 mh = page_address(page);
823 clear_page(mh);
b3b94faa 824 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
e3167ded
SW
825 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
826 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
b3b94faa
DT
827 offset = sizeof(struct gfs2_meta_header);
828 }
829
e8c92ed7 830 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
cd915493 831 offset += sizeof(u64);
b3b94faa
DT
832 }
833 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
834
e8c92ed7 835 gfs2_log_write_page(sdp, page);
b3b94faa
DT
836}
837
16ca9412 838static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
f42ab085
SW
839{
840 struct list_head *head = &sdp->sd_log_le_revoke;
841 struct gfs2_bufdata *bd;
842 struct gfs2_glock *gl;
843
844 while (!list_empty(head)) {
c0752aa7
BP
845 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
846 list_del_init(&bd->bd_list);
f42ab085
SW
847 gl = bd->bd_gl;
848 atomic_dec(&gl->gl_revokes);
849 clear_bit(GLF_LFLUSH, &gl->gl_flags);
850 kmem_cache_free(gfs2_bufdata_cachep, bd);
851 }
852}
853
b3b94faa 854static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
55167622 855 struct gfs2_log_header_host *head, int pass)
b3b94faa 856{
b3b94faa
DT
857 if (pass != 0)
858 return;
859
a17d758b
BP
860 jd->jd_found_revokes = 0;
861 jd->jd_replay_tail = head->lh_tail;
b3b94faa
DT
862}
863
864static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
865 struct gfs2_log_descriptor *ld, __be64 *ptr,
866 int pass)
867{
feaa7bba 868 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
869 unsigned int blks = be32_to_cpu(ld->ld_length);
870 unsigned int revokes = be32_to_cpu(ld->ld_data1);
871 struct buffer_head *bh;
872 unsigned int offset;
cd915493 873 u64 blkno;
b3b94faa
DT
874 int first = 1;
875 int error;
876
877 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
878 return 0;
879
880 offset = sizeof(struct gfs2_log_descriptor);
881
e1cb6be9 882 for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
b3b94faa
DT
883 error = gfs2_replay_read_block(jd, start, &bh);
884 if (error)
885 return error;
886
887 if (!first)
888 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
889
cd915493 890 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
b3b94faa
DT
891 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
892
a17d758b 893 error = gfs2_revoke_add(jd, blkno, start);
3ad62e87
BP
894 if (error < 0) {
895 brelse(bh);
b3b94faa 896 return error;
3ad62e87 897 }
b3b94faa 898 else if (error)
a17d758b 899 jd->jd_found_revokes++;
b3b94faa
DT
900
901 if (!--revokes)
902 break;
cd915493 903 offset += sizeof(u64);
b3b94faa
DT
904 }
905
906 brelse(bh);
907 offset = sizeof(struct gfs2_meta_header);
908 first = 0;
909 }
910
911 return 0;
912}
913
914static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
915{
feaa7bba 916 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
b3b94faa
DT
917
918 if (error) {
a17d758b 919 gfs2_revoke_clean(jd);
b3b94faa
DT
920 return;
921 }
922 if (pass != 1)
923 return;
924
925 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
a17d758b 926 jd->jd_jid, jd->jd_found_revokes);
b3b94faa 927
a17d758b 928 gfs2_revoke_clean(jd);
b3b94faa
DT
929}
930
16615be1
SW
931/**
932 * databuf_lo_before_commit - Scan the data buffers, writing as we go
933 *
934 */
935
d69a3c65 936static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
16615be1 937{
022ef4fe
SW
938 unsigned int limit = databuf_limit(sdp);
939 unsigned int nbuf;
d69a3c65
SW
940 if (tr == NULL)
941 return;
022ef4fe
SW
942 nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
943 gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
18ec7d5c
SW
944}
945
946static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
947 struct gfs2_log_descriptor *ld,
948 __be64 *ptr, int pass)
949{
feaa7bba 950 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
5c676f6d 951 struct gfs2_glock *gl = ip->i_gl;
18ec7d5c
SW
952 unsigned int blks = be32_to_cpu(ld->ld_data1);
953 struct buffer_head *bh_log, *bh_ip;
cd915493
SW
954 u64 blkno;
955 u64 esc;
18ec7d5c
SW
956 int error = 0;
957
958 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
959 return 0;
960
e1cb6be9
BP
961 gfs2_replay_incr_blk(jd, &start);
962 for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
18ec7d5c
SW
963 blkno = be64_to_cpu(*ptr++);
964 esc = be64_to_cpu(*ptr++);
965
a17d758b 966 jd->jd_found_blocks++;
18ec7d5c 967
a17d758b 968 if (gfs2_revoke_check(jd, blkno, start))
18ec7d5c
SW
969 continue;
970
971 error = gfs2_replay_read_block(jd, start, &bh_log);
972 if (error)
973 return error;
974
975 bh_ip = gfs2_meta_new(gl, blkno);
976 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
977
978 /* Unescape */
979 if (esc) {
980 __be32 *eptr = (__be32 *)bh_ip->b_data;
981 *eptr = cpu_to_be32(GFS2_MAGIC);
982 }
983 mark_buffer_dirty(bh_ip);
984
985 brelse(bh_log);
986 brelse(bh_ip);
18ec7d5c 987
a17d758b 988 jd->jd_replayed_blocks++;
18ec7d5c
SW
989 }
990
991 return error;
992}
993
994/* FIXME: sort out accounting for log blocks etc. */
995
996static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
997{
feaa7bba
SW
998 struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
999 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
18ec7d5c
SW
1000
1001 if (error) {
7276b3b0 1002 gfs2_meta_sync(ip->i_gl);
18ec7d5c
SW
1003 return;
1004 }
1005 if (pass != 1)
1006 return;
1007
1008 /* data sync? */
7276b3b0 1009 gfs2_meta_sync(ip->i_gl);
18ec7d5c
SW
1010
1011 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
a17d758b 1012 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
18ec7d5c
SW
1013}
1014
16ca9412 1015static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
18ec7d5c 1016{
d69a3c65 1017 struct list_head *head;
18ec7d5c
SW
1018 struct gfs2_bufdata *bd;
1019
d69a3c65 1020 if (tr == NULL)
16ca9412 1021 return;
16ca9412 1022
d69a3c65 1023 head = &tr->tr_databuf;
18ec7d5c 1024 while (!list_empty(head)) {
c0752aa7
BP
1025 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
1026 list_del_init(&bd->bd_list);
16ca9412 1027 gfs2_unpin(sdp, bd->bd_bh, tr);
18ec7d5c 1028 }
b3b94faa
DT
1029}
1030
18ec7d5c 1031
b09e593d 1032const struct gfs2_log_operations gfs2_buf_lops = {
b3b94faa
DT
1033 .lo_before_commit = buf_lo_before_commit,
1034 .lo_after_commit = buf_lo_after_commit,
1035 .lo_before_scan = buf_lo_before_scan,
1036 .lo_scan_elements = buf_lo_scan_elements,
1037 .lo_after_scan = buf_lo_after_scan,
ea67eedb 1038 .lo_name = "buf",
b3b94faa
DT
1039};
1040
b09e593d 1041const struct gfs2_log_operations gfs2_revoke_lops = {
b3b94faa 1042 .lo_before_commit = revoke_lo_before_commit,
f42ab085 1043 .lo_after_commit = revoke_lo_after_commit,
b3b94faa
DT
1044 .lo_before_scan = revoke_lo_before_scan,
1045 .lo_scan_elements = revoke_lo_scan_elements,
1046 .lo_after_scan = revoke_lo_after_scan,
ea67eedb 1047 .lo_name = "revoke",
b3b94faa
DT
1048};
1049
b09e593d 1050const struct gfs2_log_operations gfs2_databuf_lops = {
b3b94faa 1051 .lo_before_commit = databuf_lo_before_commit,
18ec7d5c
SW
1052 .lo_after_commit = databuf_lo_after_commit,
1053 .lo_scan_elements = databuf_lo_scan_elements,
1054 .lo_after_scan = databuf_lo_after_scan,
ea67eedb 1055 .lo_name = "databuf",
b3b94faa
DT
1056};
1057
b09e593d 1058const struct gfs2_log_operations *gfs2_log_ops[] = {
16615be1 1059 &gfs2_databuf_lops,
b3b94faa 1060 &gfs2_buf_lops,
16615be1 1061 &gfs2_revoke_lops,
ea67eedb 1062 NULL,
b3b94faa
DT
1063};
1064