Merge tag 'locking-core-2023-05-05' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / fs / jbd2 / commit.c
CommitLineData
f5166768 1// SPDX-License-Identifier: GPL-2.0+
470decc6 2/*
f7f4bccb 3 * linux/fs/jbd2/commit.c
470decc6
DK
4 *
5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6 *
7 * Copyright 1998 Red Hat corp --- All Rights Reserved
8 *
470decc6
DK
9 * Journal commit routines for the generic filesystem journaling code;
10 * part of the ext2fs journaling system.
11 */
12
13#include <linux/time.h>
14#include <linux/fs.h>
f7f4bccb 15#include <linux/jbd2.h>
470decc6
DK
16#include <linux/errno.h>
17#include <linux/slab.h>
18#include <linux/mm.h>
19#include <linux/pagemap.h>
8e85fb3f 20#include <linux/jiffies.h>
818d276c 21#include <linux/crc32.h>
cd1aac32
AK
22#include <linux/writeback.h>
23#include <linux/backing-dev.h>
fd98496f 24#include <linux/bio.h>
0e3d2a63 25#include <linux/blkdev.h>
39e3ac25 26#include <linux/bitops.h>
879c5e6b 27#include <trace/events/jbd2.h>
470decc6
DK
28
29/*
b34090e5 30 * IO end handler for temporary buffer_heads handling writes to the journal.
470decc6
DK
31 */
32static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
33{
b34090e5
JK
34 struct buffer_head *orig_bh = bh->b_private;
35
470decc6
DK
36 BUFFER_TRACE(bh, "");
37 if (uptodate)
38 set_buffer_uptodate(bh);
39 else
40 clear_buffer_uptodate(bh);
b34090e5
JK
41 if (orig_bh) {
42 clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
4e857c58 43 smp_mb__after_atomic();
b34090e5
JK
44 wake_up_bit(&orig_bh->b_state, BH_Shadow);
45 }
470decc6
DK
46 unlock_buffer(bh);
47}
48
49/*
87c89c23
JK
50 * When an ext4 file is truncated, it is possible that some pages are not
51 * successfully freed, because they are attached to a committing transaction.
470decc6
DK
52 * After the transaction commits, these pages are left on the LRU, with no
53 * ->mapping, and with attached buffers. These pages are trivially reclaimable
54 * by the VM, but their apparent absence upsets the VM accounting, and it makes
55 * the numbers in /proc/meminfo look odd.
56 *
57 * So here, we have a buffer which has just come off the forget list. Look to
58 * see if we can strip all buffers from the backing page.
59 *
60 * Called under lock_journal(), and possibly under journal_datalist_lock. The
61 * caller provided us with a ref against the buffer, and we drop that here.
62 */
63static void release_buffer_page(struct buffer_head *bh)
64{
73122255 65 struct folio *folio;
470decc6
DK
66
67 if (buffer_dirty(bh))
68 goto nope;
69 if (atomic_read(&bh->b_count) != 1)
70 goto nope;
0d22fe2f 71 folio = bh->b_folio;
73122255 72 if (folio->mapping)
470decc6
DK
73 goto nope;
74
75 /* OK, it's a truncated page */
73122255 76 if (!folio_trylock(folio))
470decc6
DK
77 goto nope;
78
73122255 79 folio_get(folio);
470decc6 80 __brelse(bh);
68189fef 81 try_to_free_buffers(folio);
73122255
MWO
82 folio_unlock(folio);
83 folio_put(folio);
470decc6
DK
84 return;
85
86nope:
87 __brelse(bh);
88}
89
e5a120ae 90static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
1f56c589
DW
91{
92 struct commit_header *h;
93 __u32 csum;
94
db9ee220 95 if (!jbd2_journal_has_csum_v2or3(j))
1f56c589
DW
96 return;
97
e5a120ae 98 h = (struct commit_header *)(bh->b_data);
1f56c589
DW
99 h->h_chksum_type = 0;
100 h->h_chksum_size = 0;
101 h->h_chksum[0] = 0;
e5a120ae 102 csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
1f56c589
DW
103 h->h_chksum[0] = cpu_to_be32(csum);
104}
105
818d276c
GS
106/*
107 * Done it all: now submit the commit record. We should have
470decc6
DK
108 * cleaned up our previous buffers by now, so if we are in abort
109 * mode we can now just skip the rest of the journal write
110 * entirely.
111 *
112 * Returns 1 if the journal needs to be aborted or 0 on success
113 */
818d276c
GS
114static int journal_submit_commit_record(journal_t *journal,
115 transaction_t *commit_transaction,
116 struct buffer_head **cbh,
117 __u32 crc32_sum)
470decc6 118{
818d276c 119 struct commit_header *tmp;
470decc6 120 struct buffer_head *bh;
b42d1d6b 121 struct timespec64 now;
f3ed5df3 122 blk_opf_t write_flags = REQ_OP_WRITE | REQ_SYNC;
470decc6 123
6cba611e
ZH
124 *cbh = NULL;
125
470decc6
DK
126 if (is_journal_aborted(journal))
127 return 0;
128
32ab6715
JK
129 bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
130 JBD2_COMMIT_BLOCK);
e5a120ae 131 if (!bh)
470decc6
DK
132 return 1;
133
818d276c 134 tmp = (struct commit_header *)bh->b_data;
b42d1d6b 135 ktime_get_coarse_real_ts64(&now);
736603ab
TT
136 tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
137 tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
818d276c 138
56316a0d 139 if (jbd2_has_feature_checksum(journal)) {
818d276c
GS
140 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
141 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
142 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
470decc6 143 }
e5a120ae 144 jbd2_commit_block_csum_set(journal, bh);
470decc6 145
e5a120ae 146 BUFFER_TRACE(bh, "submit commit block");
818d276c 147 lock_buffer(bh);
45a90bfd 148 clear_buffer_dirty(bh);
818d276c
GS
149 set_buffer_uptodate(bh);
150 bh->b_end_io = journal_end_buffer_io_sync;
151
152 if (journal->j_flags & JBD2_BARRIER &&
56316a0d 153 !jbd2_has_feature_async_commit(journal))
f3ed5df3 154 write_flags |= REQ_PREFLUSH | REQ_FUA;
9c35575b 155
f3ed5df3 156 submit_bh(write_flags, bh);
818d276c 157 *cbh = bh;
f3ed5df3 158 return 0;
818d276c
GS
159}
160
161/*
162 * This function along with journal_submit_commit_record
163 * allows to write the commit record asynchronously.
164 */
fd98496f
TT
165static int journal_wait_on_commit_record(journal_t *journal,
166 struct buffer_head *bh)
818d276c
GS
167{
168 int ret = 0;
169
170 clear_buffer_dirty(bh);
171 wait_on_buffer(bh);
470decc6 172
818d276c
GS
173 if (unlikely(!buffer_uptodate(bh)))
174 ret = -EIO;
175 put_bh(bh); /* One for getblk() */
818d276c
GS
176
177 return ret;
470decc6
DK
178}
179
ff780b91 180/* Send all the data buffers related to an inode */
f30ff35f 181int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
ff780b91 182{
ff780b91
HS
183 if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
184 return 0;
185
186 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
f30ff35f 187 return journal->j_submit_inode_data_buffers(jinode);
ff780b91
HS
188
189}
190EXPORT_SYMBOL(jbd2_submit_inode_data);
191
192int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
193{
194 if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
195 !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
196 return 0;
197 return filemap_fdatawait_range_keep_errors(
198 jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
199 jinode->i_dirty_end);
200}
201EXPORT_SYMBOL(jbd2_wait_inode_data);
202
c851ed54
JK
203/*
204 * Submit all the data buffers of inode associated with the transaction to
205 * disk.
206 *
207 * We are in a committing transaction. Therefore no new inode can be added to
208 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
209 * operate on from being released while we write out pages.
210 */
cd1aac32 211static int journal_submit_data_buffers(journal_t *journal,
c851ed54
JK
212 transaction_t *commit_transaction)
213{
214 struct jbd2_inode *jinode;
215 int err, ret = 0;
c851ed54
JK
216
217 spin_lock(&journal->j_list_lock);
218 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
41617e1a
JK
219 if (!(jinode->i_flags & JI_WRITE_DATA))
220 continue;
cb0d9d47 221 jinode->i_flags |= JI_COMMIT_RUNNING;
c851ed54 222 spin_unlock(&journal->j_list_lock);
342af94e 223 /* submit the inode data buffers. */
879c5e6b 224 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
342af94e
MFO
225 if (journal->j_submit_inode_data_buffers) {
226 err = journal->j_submit_inode_data_buffers(jinode);
227 if (!ret)
228 ret = err;
229 }
c851ed54
JK
230 spin_lock(&journal->j_list_lock);
231 J_ASSERT(jinode->i_transaction == commit_transaction);
cb0d9d47
JK
232 jinode->i_flags &= ~JI_COMMIT_RUNNING;
233 smp_mb();
c851ed54
JK
234 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
235 }
236 spin_unlock(&journal->j_list_lock);
237 return ret;
238}
239
aa3c0c61
MFO
240int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
241{
242 struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
243
244 return filemap_fdatawait_range_keep_errors(mapping,
245 jinode->i_dirty_start,
246 jinode->i_dirty_end);
247}
248
c851ed54
JK
249/*
250 * Wait for data submitted for writeout, refile inodes to proper
251 * transaction if needed.
252 *
253 */
254static int journal_finish_inode_data_buffers(journal_t *journal,
255 transaction_t *commit_transaction)
256{
257 struct jbd2_inode *jinode, *next_i;
258 int err, ret = 0;
259
cd1aac32 260 /* For locking, see the comment in journal_submit_data_buffers() */
c851ed54
JK
261 spin_lock(&journal->j_list_lock);
262 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
41617e1a
JK
263 if (!(jinode->i_flags & JI_WAIT_DATA))
264 continue;
cb0d9d47 265 jinode->i_flags |= JI_COMMIT_RUNNING;
c851ed54 266 spin_unlock(&journal->j_list_lock);
342af94e
MFO
267 /* wait for the inode data buffers writeout. */
268 if (journal->j_finish_inode_data_buffers) {
269 err = journal->j_finish_inode_data_buffers(jinode);
270 if (!ret)
271 ret = err;
272 }
c851ed54 273 spin_lock(&journal->j_list_lock);
cb0d9d47
JK
274 jinode->i_flags &= ~JI_COMMIT_RUNNING;
275 smp_mb();
c851ed54
JK
276 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
277 }
278
279 /* Now refile inode to proper lists */
280 list_for_each_entry_safe(jinode, next_i,
281 &commit_transaction->t_inode_list, i_list) {
282 list_del(&jinode->i_list);
283 if (jinode->i_next_transaction) {
284 jinode->i_transaction = jinode->i_next_transaction;
285 jinode->i_next_transaction = NULL;
286 list_add(&jinode->i_list,
287 &jinode->i_transaction->t_inode_list);
288 } else {
289 jinode->i_transaction = NULL;
6ba0e7dc
RZ
290 jinode->i_dirty_start = 0;
291 jinode->i_dirty_end = 0;
c851ed54
JK
292 }
293 }
294 spin_unlock(&journal->j_list_lock);
295
296 return ret;
297}
298
818d276c
GS
299static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
300{
301 struct page *page = bh->b_page;
302 char *addr;
303 __u32 checksum;
304
303a8f2a 305 addr = kmap_atomic(page);
818d276c
GS
306 checksum = crc32_be(crc32_sum,
307 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
303a8f2a 308 kunmap_atomic(addr);
818d276c
GS
309
310 return checksum;
311}
312
db9ee220 313static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
18eba7aa 314 unsigned long long block)
b517bea1
ZB
315{
316 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
56316a0d 317 if (jbd2_has_feature_64bit(j))
b517bea1
ZB
318 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
319}
320
c3900875
DW
321static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
322 struct buffer_head *bh, __u32 sequence)
323{
db9ee220 324 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
c3900875
DW
325 struct page *page = bh->b_page;
326 __u8 *addr;
eee06c56 327 __u32 csum32;
18a6ea1e 328 __be32 seq;
c3900875 329
db9ee220 330 if (!jbd2_journal_has_csum_v2or3(j))
c3900875
DW
331 return;
332
18a6ea1e 333 seq = cpu_to_be32(sequence);
906adea1 334 addr = kmap_atomic(page);
18a6ea1e 335 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
eee06c56
DW
336 csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
337 bh->b_size);
906adea1 338 kunmap_atomic(addr);
c3900875 339
56316a0d 340 if (jbd2_has_feature_csum3(j))
db9ee220
DW
341 tag3->t_checksum = cpu_to_be32(csum32);
342 else
343 tag->t_checksum = cpu_to_be16(csum32);
c3900875 344}
470decc6 345/*
f7f4bccb 346 * jbd2_journal_commit_transaction
470decc6
DK
347 *
348 * The primary function for committing a transaction to the log. This
349 * function is called by the journal thread to begin a complete commit.
350 */
f7f4bccb 351void jbd2_journal_commit_transaction(journal_t *journal)
470decc6 352{
8e85fb3f 353 struct transaction_stats_s stats;
470decc6 354 transaction_t *commit_transaction;
e5a120ae
JK
355 struct journal_head *jh;
356 struct buffer_head *descriptor;
470decc6
DK
357 struct buffer_head **wbuf = journal->j_wbuf;
358 int bufs;
359 int flags;
360 int err;
18eba7aa 361 unsigned long long blocknr;
e07f7183
JB
362 ktime_t start_time;
363 u64 commit_time;
470decc6 364 char *tagp = NULL;
470decc6
DK
365 journal_block_tag_t *tag = NULL;
366 int space_left = 0;
367 int first_tag = 0;
368 int tag_flag;
794446c6 369 int i;
b517bea1 370 int tag_bytes = journal_tag_bytes(journal);
818d276c
GS
371 struct buffer_head *cbh = NULL; /* For transactional checksums */
372 __u32 crc32_sum = ~0;
82f04ab4 373 struct blk_plug plug;
3339578f
JK
374 /* Tail of the journal */
375 unsigned long first_block;
376 tid_t first_tid;
377 int update_tail;
3caa487f 378 int csum_size = 0;
f5113eff 379 LIST_HEAD(io_bufs);
e5a120ae 380 LIST_HEAD(log_bufs);
3caa487f 381
db9ee220 382 if (jbd2_journal_has_csum_v2or3(journal))
3caa487f 383 csum_size = sizeof(struct jbd2_journal_block_tail);
470decc6
DK
384
385 /*
386 * First job: lock down the current transaction and wait for
387 * all outstanding updates to complete.
388 */
389
f7f4bccb
MC
390 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
391 if (journal->j_flags & JBD2_FLUSHED) {
cb3b3bf2 392 jbd2_debug(3, "super block updated\n");
6fa7aa50 393 mutex_lock_io(&journal->j_checkpoint_mutex);
79feb521
JK
394 /*
395 * We hold j_checkpoint_mutex so tail cannot change under us.
396 * We don't need any special data guarantees for writing sb
397 * since journal is empty and it is ok for write to be
398 * flushed only with transaction commit.
399 */
400 jbd2_journal_update_sb_log_tail(journal,
401 journal->j_tail_sequence,
402 journal->j_tail,
70fd7614 403 REQ_SYNC);
a78bb11d 404 mutex_unlock(&journal->j_checkpoint_mutex);
470decc6 405 } else {
cb3b3bf2 406 jbd2_debug(3, "superblock not updated\n");
470decc6
DK
407 }
408
409 J_ASSERT(journal->j_running_transaction != NULL);
410 J_ASSERT(journal->j_committing_transaction == NULL);
411
ff780b91
HS
412 write_lock(&journal->j_state_lock);
413 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
414 while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
415 DEFINE_WAIT(wait);
416
417 prepare_to_wait(&journal->j_fc_wait, &wait,
418 TASK_UNINTERRUPTIBLE);
419 write_unlock(&journal->j_state_lock);
420 schedule();
421 write_lock(&journal->j_state_lock);
422 finish_wait(&journal->j_fc_wait, &wait);
cc80586a
HS
423 /*
424 * TODO: by blocking fast commits here, we are increasing
425 * fsync() latency slightly. Strictly speaking, we don't need
426 * to block fast commits until the transaction enters T_FLUSH
427 * state. So an optimization is possible where we block new fast
428 * commits here and wait for existing ones to complete
429 * just before we enter T_FLUSH. That way, the existing fast
430 * commits and this full commit can proceed parallely.
431 */
ff780b91
HS
432 }
433 write_unlock(&journal->j_state_lock);
434
470decc6 435 commit_transaction = journal->j_running_transaction;
470decc6 436
879c5e6b 437 trace_jbd2_start_commit(journal, commit_transaction);
cb3b3bf2 438 jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
470decc6
DK
439 commit_transaction->t_tid);
440
a931da6a 441 write_lock(&journal->j_state_lock);
ff780b91 442 journal->j_fc_off = 0;
3ca841c1 443 J_ASSERT(commit_transaction->t_state == T_RUNNING);
470decc6
DK
444 commit_transaction->t_state = T_LOCKED;
445
879c5e6b 446 trace_jbd2_commit_locking(journal, commit_transaction);
bf699327 447 stats.run.rs_wait = commit_transaction->t_max_wait;
9fff24aa 448 stats.run.rs_request_delay = 0;
bf699327 449 stats.run.rs_locked = jiffies;
9fff24aa
TT
450 if (commit_transaction->t_requested)
451 stats.run.rs_request_delay =
452 jbd2_time_diff(commit_transaction->t_requested,
453 stats.run.rs_locked);
bf699327
TT
454 stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
455 stats.run.rs_locked);
8e85fb3f 456
4f981868
RH
457 // waits for any t_updates to finish
458 jbd2_journal_wait_updates(journal);
470decc6 459
96f1e097 460 commit_transaction->t_state = T_SWITCH;
470decc6 461
a51dca9c 462 J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
470decc6
DK
463 journal->j_max_transaction_buffers);
464
465 /*
466 * First thing we are allowed to do is to discard any remaining
467 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
468 * that there are no such buffers: if a large filesystem
469 * operation like a truncate needs to split itself over multiple
f7f4bccb 470 * transactions, then it may try to do a jbd2_journal_restart() while
470decc6
DK
471 * there are still BJ_Reserved buffers outstanding. These must
472 * be released cleanly from the current transaction.
473 *
474 * In this case, the filesystem must still reserve write access
475 * again before modifying the buffer in the new transaction, but
476 * we do not require it to remember exactly which old buffers it
477 * has reserved. This is consistent with the existing behaviour
f7f4bccb 478 * that multiple jbd2_journal_get_write_access() calls to the same
25985edc 479 * buffer are perfectly permissible.
23e3d7f7
YB
480 * We use journal->j_state_lock here to serialize processing of
481 * t_reserved_list with eviction of buffers from journal_unmap_buffer().
470decc6
DK
482 */
483 while (commit_transaction->t_reserved_list) {
484 jh = commit_transaction->t_reserved_list;
485 JBUFFER_TRACE(jh, "reserved, unused: refile");
486 /*
f7f4bccb 487 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
470decc6
DK
488 * leave undo-committed data.
489 */
490 if (jh->b_committed_data) {
491 struct buffer_head *bh = jh2bh(jh);
492
46417064 493 spin_lock(&jh->b_state_lock);
af1e76d6 494 jbd2_free(jh->b_committed_data, bh->b_size);
470decc6 495 jh->b_committed_data = NULL;
46417064 496 spin_unlock(&jh->b_state_lock);
470decc6 497 }
f7f4bccb 498 jbd2_journal_refile_buffer(journal, jh);
470decc6
DK
499 }
500
23e3d7f7 501 write_unlock(&journal->j_state_lock);
470decc6
DK
502 /*
503 * Now try to drop any written-back buffers from the journal's
504 * checkpoint lists. We do this *before* commit because it potentially
505 * frees some memory
506 */
507 spin_lock(&journal->j_list_lock);
841df7df 508 __jbd2_journal_clean_checkpoint_list(journal, false);
470decc6
DK
509 spin_unlock(&journal->j_list_lock);
510
cb3b3bf2 511 jbd2_debug(3, "JBD2: commit phase 1\n");
470decc6 512
1ba37268
YY
513 /*
514 * Clear revoked flag to reflect there is no revoked buffers
515 * in the next transaction which is going to be started.
516 */
517 jbd2_clear_buffer_revoked_flags(journal);
518
470decc6
DK
519 /*
520 * Switch to a new revoke table.
521 */
f7f4bccb 522 jbd2_journal_switch_revoke_table(journal);
470decc6 523
a89573ce 524 write_lock(&journal->j_state_lock);
8f7d89f3
JK
525 /*
526 * Reserved credits cannot be claimed anymore, free them
527 */
528 atomic_sub(atomic_read(&journal->j_reserved_credits),
529 &commit_transaction->t_outstanding_credits);
530
879c5e6b 531 trace_jbd2_commit_flushing(journal, commit_transaction);
bf699327
TT
532 stats.run.rs_flushing = jiffies;
533 stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
534 stats.run.rs_flushing);
8e85fb3f 535
470decc6
DK
536 commit_transaction->t_state = T_FLUSH;
537 journal->j_committing_transaction = commit_transaction;
538 journal->j_running_transaction = NULL;
e07f7183 539 start_time = ktime_get();
470decc6 540 commit_transaction->t_log_start = journal->j_head;
34fc8768 541 wake_up_all(&journal->j_wait_transaction_locked);
a931da6a 542 write_unlock(&journal->j_state_lock);
470decc6 543
cb3b3bf2 544 jbd2_debug(3, "JBD2: commit phase 2a\n");
470decc6 545
470decc6
DK
546 /*
547 * Now start flushing things to disk, in the order they appear
548 * on the transaction lists. Data blocks go first.
549 */
cd1aac32 550 err = journal_submit_data_buffers(journal, commit_transaction);
470decc6 551 if (err)
a7fa2baf 552 jbd2_journal_abort(journal, err);
470decc6 553
82f04ab4 554 blk_start_plug(&plug);
9bcf976c 555 jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
470decc6 556
cb3b3bf2 557 jbd2_debug(3, "JBD2: commit phase 2b\n");
470decc6 558
470decc6
DK
559 /*
560 * Way to go: we have now written out all of the data for a
561 * transaction! Now comes the tricky part: we need to write out
562 * metadata. Loop over the transaction's entire buffer list:
563 */
a931da6a 564 write_lock(&journal->j_state_lock);
470decc6 565 commit_transaction->t_state = T_COMMIT;
a931da6a 566 write_unlock(&journal->j_state_lock);
470decc6 567
879c5e6b 568 trace_jbd2_commit_logging(journal, commit_transaction);
bf699327
TT
569 stats.run.rs_logging = jiffies;
570 stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
571 stats.run.rs_logging);
9f356e5a 572 stats.run.rs_blocks = commit_transaction->t_nr_buffers;
bf699327 573 stats.run.rs_blocks_logged = 0;
8e85fb3f 574
1dfc3220 575 J_ASSERT(commit_transaction->t_nr_buffers <=
a51dca9c 576 atomic_read(&commit_transaction->t_outstanding_credits));
1dfc3220 577
87c89c23 578 err = 0;
470decc6 579 bufs = 0;
e5a120ae 580 descriptor = NULL;
470decc6
DK
581 while (commit_transaction->t_buffers) {
582
583 /* Find the next buffer to be journaled... */
584
585 jh = commit_transaction->t_buffers;
586
587 /* If we're in abort mode, we just un-journal the buffer and
7ad7445f 588 release it. */
470decc6
DK
589
590 if (is_journal_aborted(journal)) {
7ad7445f 591 clear_buffer_jbddirty(jh2bh(jh));
470decc6 592 JBUFFER_TRACE(jh, "journal is aborting: refile");
e06c8227
JB
593 jbd2_buffer_abort_trigger(jh,
594 jh->b_frozen_data ?
595 jh->b_frozen_triggers :
596 jh->b_triggers);
f7f4bccb 597 jbd2_journal_refile_buffer(journal, jh);
470decc6
DK
598 /* If that was the last one, we need to clean up
599 * any descriptor buffers which may have been
600 * already allocated, even if we are now
601 * aborting. */
602 if (!commit_transaction->t_buffers)
603 goto start_journal_io;
604 continue;
605 }
606
607 /* Make sure we have a descriptor block in which to
608 record the metadata buffer. */
609
610 if (!descriptor) {
470decc6
DK
611 J_ASSERT (bufs == 0);
612
cb3b3bf2 613 jbd2_debug(4, "JBD2: get descriptor\n");
470decc6 614
32ab6715
JK
615 descriptor = jbd2_journal_get_descriptor_buffer(
616 commit_transaction,
617 JBD2_DESCRIPTOR_BLOCK);
470decc6 618 if (!descriptor) {
a7fa2baf 619 jbd2_journal_abort(journal, -EIO);
470decc6
DK
620 continue;
621 }
622
cb3b3bf2 623 jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
e5a120ae
JK
624 (unsigned long long)descriptor->b_blocknr,
625 descriptor->b_data);
e5a120ae
JK
626 tagp = &descriptor->b_data[sizeof(journal_header_t)];
627 space_left = descriptor->b_size -
628 sizeof(journal_header_t);
470decc6 629 first_tag = 1;
e5a120ae
JK
630 set_buffer_jwrite(descriptor);
631 set_buffer_dirty(descriptor);
632 wbuf[bufs++] = descriptor;
470decc6
DK
633
634 /* Record it so that we can wait for IO
635 completion later */
e5a120ae
JK
636 BUFFER_TRACE(descriptor, "ph3: file as descriptor");
637 jbd2_file_log_bh(&log_bufs, descriptor);
470decc6
DK
638 }
639
640 /* Where is the buffer to be written? */
641
f7f4bccb 642 err = jbd2_journal_next_log_block(journal, &blocknr);
470decc6
DK
643 /* If the block mapping failed, just abandon the buffer
644 and repeat this loop: we'll fall into the
645 refile-on-abort condition above. */
646 if (err) {
a7fa2baf 647 jbd2_journal_abort(journal, err);
470decc6
DK
648 continue;
649 }
650
651 /*
652 * start_this_handle() uses t_outstanding_credits to determine
0db45889 653 * the free space in the log.
470decc6 654 */
a51dca9c 655 atomic_dec(&commit_transaction->t_outstanding_credits);
470decc6
DK
656
657 /* Bump b_count to prevent truncate from stumbling over
658 the shadowed buffer! @@@ This can go if we ever get
f5113eff 659 rid of the shadow pairing of buffers. */
470decc6
DK
660 atomic_inc(&jh2bh(jh)->b_count);
661
470decc6 662 /*
f5113eff
JK
663 * Make a temporary IO buffer with which to write it out
664 * (this will requeue the metadata buffer to BJ_Shadow).
470decc6 665 */
f5113eff 666 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
470decc6 667 JBUFFER_TRACE(jh, "ph3: write metadata");
f7f4bccb 668 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
f5113eff 669 jh, &wbuf[bufs], blocknr);
e6ec116b
TT
670 if (flags < 0) {
671 jbd2_journal_abort(journal, flags);
672 continue;
673 }
f5113eff 674 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
470decc6
DK
675
676 /* Record the new block's tag in the current descriptor
677 buffer */
678
679 tag_flag = 0;
680 if (flags & 1)
f7f4bccb 681 tag_flag |= JBD2_FLAG_ESCAPE;
470decc6 682 if (!first_tag)
f7f4bccb 683 tag_flag |= JBD2_FLAG_SAME_UUID;
470decc6
DK
684
685 tag = (journal_block_tag_t *) tagp;
db9ee220 686 write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
8f888ef8 687 tag->t_flags = cpu_to_be16(tag_flag);
f5113eff 688 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
c3900875 689 commit_transaction->t_tid);
b517bea1
ZB
690 tagp += tag_bytes;
691 space_left -= tag_bytes;
f5113eff 692 bufs++;
470decc6
DK
693
694 if (first_tag) {
695 memcpy (tagp, journal->j_uuid, 16);
696 tagp += 16;
697 space_left -= 16;
698 first_tag = 0;
699 }
700
701 /* If there's no more to do, or if the descriptor is full,
702 let the IO rip! */
703
704 if (bufs == journal->j_wbufsize ||
705 commit_transaction->t_buffers == NULL ||
3caa487f 706 space_left < tag_bytes + 16 + csum_size) {
470decc6 707
cb3b3bf2 708 jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
470decc6
DK
709
710 /* Write an end-of-descriptor marker before
711 submitting the IOs. "tag" still points to
712 the last tag we set up. */
713
8f888ef8 714 tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
470decc6 715start_journal_io:
6e876c3d 716 if (descriptor)
717 jbd2_descriptor_block_csum_set(journal,
718 descriptor);
719
470decc6
DK
720 for (i = 0; i < bufs; i++) {
721 struct buffer_head *bh = wbuf[i];
818d276c
GS
722 /*
723 * Compute checksum.
724 */
56316a0d 725 if (jbd2_has_feature_checksum(journal)) {
818d276c
GS
726 crc32_sum =
727 jbd2_checksum_data(crc32_sum, bh);
728 }
729
470decc6
DK
730 lock_buffer(bh);
731 clear_buffer_dirty(bh);
732 set_buffer_uptodate(bh);
733 bh->b_end_io = journal_end_buffer_io_sync;
1420c4a5 734 submit_bh(REQ_OP_WRITE | REQ_SYNC, bh);
470decc6
DK
735 }
736 cond_resched();
737
738 /* Force a new descriptor to be generated next
739 time round the loop. */
740 descriptor = NULL;
741 bufs = 0;
742 }
743 }
744
f73bee49
JK
745 err = journal_finish_inode_data_buffers(journal, commit_transaction);
746 if (err) {
747 printk(KERN_WARNING
748 "JBD2: Detected IO errors while flushing file data "
749 "on %s\n", journal->j_devname);
750 if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
751 jbd2_journal_abort(journal, err);
752 err = 0;
753 }
754
3339578f
JK
755 /*
756 * Get current oldest transaction in the log before we issue flush
757 * to the filesystem device. After the flush we can be sure that
758 * blocks of all older transactions are checkpointed to persistent
759 * storage and we will be safe to update journal start in the
760 * superblock with the numbers we get here.
761 */
762 update_tail =
763 jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
764
bbd2be36 765 write_lock(&journal->j_state_lock);
3339578f
JK
766 if (update_tail) {
767 long freed = first_block - journal->j_tail;
768
769 if (first_block < journal->j_tail)
770 freed += journal->j_last - journal->j_first;
771 /* Update tail only if we free significant amount of space */
ede7dc7f 772 if (freed < jbd2_journal_get_max_txn_bufs(journal))
3339578f
JK
773 update_tail = 0;
774 }
bbd2be36
JK
775 J_ASSERT(commit_transaction->t_state == T_COMMIT);
776 commit_transaction->t_state = T_COMMIT_DFLUSH;
777 write_unlock(&journal->j_state_lock);
3339578f 778
4f981868 779 /*
cc3e1bea
TT
780 * If the journal is not located on the file system device,
781 * then we must flush the file system device before we issue
782 * the commit record
783 */
81be12c8 784 if (commit_transaction->t_need_data_flush &&
cc3e1bea
TT
785 (journal->j_fs_dev != journal->j_dev) &&
786 (journal->j_flags & JBD2_BARRIER))
c6bf3f0e 787 blkdev_issue_flush(journal->j_fs_dev);
818d276c 788
cc3e1bea 789 /* Done it all: now write the commit record asynchronously. */
56316a0d 790 if (jbd2_has_feature_async_commit(journal)) {
818d276c
GS
791 err = journal_submit_commit_record(journal, commit_transaction,
792 &cbh, crc32_sum);
793 if (err)
d0a186e0 794 jbd2_journal_abort(journal, err);
e9e34f4e 795 }
c851ed54 796
82f04ab4
JA
797 blk_finish_plug(&plug);
798
470decc6
DK
799 /* Lo and behold: we have just managed to send a transaction to
800 the log. Before we can commit it, wait for the IO so far to
801 complete. Control buffers being written are on the
802 transaction's t_log_list queue, and metadata buffers are on
f5113eff 803 the io_bufs list.
470decc6
DK
804
805 Wait for the buffers in reverse order. That way we are
806 less likely to be woken up until all IOs have completed, and
807 so we incur less scheduling load.
808 */
809
cb3b3bf2 810 jbd2_debug(3, "JBD2: commit phase 3\n");
470decc6 811
f5113eff
JK
812 while (!list_empty(&io_bufs)) {
813 struct buffer_head *bh = list_entry(io_bufs.prev,
814 struct buffer_head,
815 b_assoc_buffers);
470decc6 816
f5113eff
JK
817 wait_on_buffer(bh);
818 cond_resched();
470decc6
DK
819
820 if (unlikely(!buffer_uptodate(bh)))
821 err = -EIO;
f5113eff 822 jbd2_unfile_log_bh(bh);
015c6033 823 stats.run.rs_blocks_logged++;
470decc6
DK
824
825 /*
f5113eff
JK
826 * The list contains temporary buffer heads created by
827 * jbd2_journal_write_metadata_buffer().
470decc6
DK
828 */
829 BUFFER_TRACE(bh, "dumping temporary bh");
470decc6
DK
830 __brelse(bh);
831 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
832 free_buffer_head(bh);
833
f5113eff 834 /* We also have to refile the corresponding shadowed buffer */
470decc6
DK
835 jh = commit_transaction->t_shadow_list->b_tprev;
836 bh = jh2bh(jh);
f5113eff 837 clear_buffer_jwrite(bh);
470decc6 838 J_ASSERT_BH(bh, buffer_jbddirty(bh));
b34090e5 839 J_ASSERT_BH(bh, !buffer_shadow(bh));
470decc6
DK
840
841 /* The metadata is now released for reuse, but we need
842 to remember it against this transaction so that when
843 we finally commit, we can do any checkpointing
844 required. */
845 JBUFFER_TRACE(jh, "file as BJ_Forget");
f7f4bccb 846 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
470decc6
DK
847 JBUFFER_TRACE(jh, "brelse shadowed buffer");
848 __brelse(bh);
849 }
850
851 J_ASSERT (commit_transaction->t_shadow_list == NULL);
852
cb3b3bf2 853 jbd2_debug(3, "JBD2: commit phase 4\n");
470decc6
DK
854
855 /* Here we wait for the revoke record and descriptor record buffers */
e5a120ae 856 while (!list_empty(&log_bufs)) {
470decc6
DK
857 struct buffer_head *bh;
858
e5a120ae
JK
859 bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
860 wait_on_buffer(bh);
861 cond_resched();
470decc6
DK
862
863 if (unlikely(!buffer_uptodate(bh)))
864 err = -EIO;
865
866 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
867 clear_buffer_jwrite(bh);
e5a120ae 868 jbd2_unfile_log_bh(bh);
015c6033 869 stats.run.rs_blocks_logged++;
470decc6
DK
870 __brelse(bh); /* One for getblk */
871 /* AKPM: bforget here */
872 }
873
77e841de
HK
874 if (err)
875 jbd2_journal_abort(journal, err);
876
cb3b3bf2 877 jbd2_debug(3, "JBD2: commit phase 5\n");
bbd2be36
JK
878 write_lock(&journal->j_state_lock);
879 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
880 commit_transaction->t_state = T_COMMIT_JFLUSH;
881 write_unlock(&journal->j_state_lock);
470decc6 882
56316a0d 883 if (!jbd2_has_feature_async_commit(journal)) {
818d276c
GS
884 err = journal_submit_commit_record(journal, commit_transaction,
885 &cbh, crc32_sum);
886 if (err)
d0a186e0 887 jbd2_journal_abort(journal, err);
818d276c 888 }
6cba611e 889 if (cbh)
fd98496f 890 err = journal_wait_on_commit_record(journal, cbh);
015c6033 891 stats.run.rs_blocks_logged++;
56316a0d 892 if (jbd2_has_feature_async_commit(journal) &&
f73bee49 893 journal->j_flags & JBD2_BARRIER) {
c6bf3f0e 894 blkdev_issue_flush(journal->j_dev);
f73bee49 895 }
470decc6
DK
896
897 if (err)
a7fa2baf 898 jbd2_journal_abort(journal, err);
470decc6 899
9f356e5a
JK
900 WARN_ON_ONCE(
901 atomic_read(&commit_transaction->t_outstanding_credits) < 0);
902
3339578f
JK
903 /*
904 * Now disk caches for filesystem device are flushed so we are safe to
905 * erase checkpointed transactions from the log by updating journal
906 * superblock.
907 */
908 if (update_tail)
909 jbd2_update_log_tail(journal, first_tid, first_block);
910
470decc6
DK
911 /* End of a transaction! Finally, we can do checkpoint
912 processing: any buffers committed as a result of this
913 transaction can be removed from any checkpoint list it was on
914 before. */
915
cb3b3bf2 916 jbd2_debug(3, "JBD2: commit phase 6\n");
470decc6 917
c851ed54 918 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
470decc6
DK
919 J_ASSERT(commit_transaction->t_buffers == NULL);
920 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
470decc6 921 J_ASSERT(commit_transaction->t_shadow_list == NULL);
470decc6
DK
922
923restart_loop:
924 /*
925 * As there are other places (journal_unmap_buffer()) adding buffers
926 * to this list we have to be careful and hold the j_list_lock.
927 */
928 spin_lock(&journal->j_list_lock);
929 while (commit_transaction->t_forget) {
930 transaction_t *cp_transaction;
931 struct buffer_head *bh;
de1b7941 932 int try_to_free = 0;
93108ebb 933 bool drop_ref;
470decc6
DK
934
935 jh = commit_transaction->t_forget;
936 spin_unlock(&journal->j_list_lock);
937 bh = jh2bh(jh);
de1b7941
JK
938 /*
939 * Get a reference so that bh cannot be freed before we are
940 * done with it.
941 */
942 get_bh(bh);
46417064 943 spin_lock(&jh->b_state_lock);
23e2af35 944 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
470decc6
DK
945
946 /*
947 * If there is undo-protected committed data against
948 * this buffer, then we can remove it now. If it is a
949 * buffer needing such protection, the old frozen_data
950 * field now points to a committed version of the
951 * buffer, so rotate that field to the new committed
952 * data.
953 *
954 * Otherwise, we can just throw away the frozen data now.
e06c8227
JB
955 *
956 * We also know that the frozen data has already fired
957 * its triggers if they exist, so we can clear that too.
470decc6
DK
958 */
959 if (jh->b_committed_data) {
af1e76d6 960 jbd2_free(jh->b_committed_data, bh->b_size);
470decc6
DK
961 jh->b_committed_data = NULL;
962 if (jh->b_frozen_data) {
963 jh->b_committed_data = jh->b_frozen_data;
964 jh->b_frozen_data = NULL;
e06c8227 965 jh->b_frozen_triggers = NULL;
470decc6
DK
966 }
967 } else if (jh->b_frozen_data) {
af1e76d6 968 jbd2_free(jh->b_frozen_data, bh->b_size);
470decc6 969 jh->b_frozen_data = NULL;
e06c8227 970 jh->b_frozen_triggers = NULL;
470decc6
DK
971 }
972
973 spin_lock(&journal->j_list_lock);
974 cp_transaction = jh->b_cp_transaction;
975 if (cp_transaction) {
976 JBUFFER_TRACE(jh, "remove from old cp transaction");
8e85fb3f 977 cp_transaction->t_chp_stats.cs_dropped++;
f7f4bccb 978 __jbd2_journal_remove_checkpoint(jh);
470decc6
DK
979 }
980
981 /* Only re-checkpoint the buffer_head if it is marked
982 * dirty. If the buffer was added to the BJ_Forget list
f7f4bccb 983 * by jbd2_journal_forget, it may no longer be dirty and
470decc6
DK
984 * there's no point in keeping a checkpoint record for
985 * it. */
986
b794e7a6 987 /*
6a66a7de 988 * A buffer which has been freed while still being journaled
989 * by a previous transaction, refile the buffer to BJ_Forget of
990 * the running transaction. If the just committed transaction
991 * contains "add to orphan" operation, we can completely
992 * invalidate the buffer now. We are rather through in that
993 * since the buffer may be still accessible when blocksize <
994 * pagesize and it is attached to the last partial page.
995 */
996 if (buffer_freed(bh) && !jh->b_next_transaction) {
c96dceea 997 struct address_space *mapping;
998
6a66a7de 999 clear_buffer_freed(bh);
1000 clear_buffer_jbddirty(bh);
c96dceea 1001
1002 /*
1003 * Block device buffers need to stay mapped all the
1004 * time, so it is enough to clear buffer_jbddirty and
1005 * buffer_freed bits. For the file mapping buffers (i.e.
1006 * journalled data) we need to unmap buffer and clear
1007 * more bits. We also need to be careful about the check
1008 * because the data page mapping can get cleared under
780f66e5 1009 * our hands. Note that if mapping == NULL, we don't
1010 * need to make buffer unmapped because the page is
1011 * already detached from the mapping and buffers cannot
1012 * get reused.
c96dceea 1013 */
0d22fe2f 1014 mapping = READ_ONCE(bh->b_folio->mapping);
c96dceea 1015 if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
1016 clear_buffer_mapped(bh);
1017 clear_buffer_new(bh);
1018 clear_buffer_req(bh);
1019 bh->b_bdev = NULL;
1020 }
470decc6
DK
1021 }
1022
1023 if (buffer_jbddirty(bh)) {
1024 JBUFFER_TRACE(jh, "add to new checkpointing trans");
f7f4bccb 1025 __jbd2_journal_insert_checkpoint(jh, commit_transaction);
7ad7445f
HK
1026 if (is_journal_aborted(journal))
1027 clear_buffer_jbddirty(bh);
470decc6
DK
1028 } else {
1029 J_ASSERT_BH(bh, !buffer_dirty(bh));
de1b7941
JK
1030 /*
1031 * The buffer on BJ_Forget list and not jbddirty means
470decc6
DK
1032 * it has been freed by this transaction and hence it
1033 * could not have been reallocated until this
1034 * transaction has committed. *BUT* it could be
1035 * reallocated once we have written all the data to
1036 * disk and before we process the buffer on BJ_Forget
de1b7941
JK
1037 * list.
1038 */
1039 if (!jh->b_next_transaction)
1040 try_to_free = 1;
470decc6 1041 }
de1b7941 1042 JBUFFER_TRACE(jh, "refile or unfile buffer");
93108ebb 1043 drop_ref = __jbd2_journal_refile_buffer(jh);
46417064 1044 spin_unlock(&jh->b_state_lock);
93108ebb
JK
1045 if (drop_ref)
1046 jbd2_journal_put_journal_head(jh);
de1b7941
JK
1047 if (try_to_free)
1048 release_buffer_page(bh); /* Drops bh reference */
1049 else
1050 __brelse(bh);
470decc6
DK
1051 cond_resched_lock(&journal->j_list_lock);
1052 }
1053 spin_unlock(&journal->j_list_lock);
1054 /*
f5a7a6b0
JK
1055 * This is a bit sleazy. We use j_list_lock to protect transition
1056 * of a transaction into T_FINISHED state and calling
1057 * __jbd2_journal_drop_transaction(). Otherwise we could race with
1058 * other checkpointing code processing the transaction...
470decc6 1059 */
a931da6a 1060 write_lock(&journal->j_state_lock);
470decc6
DK
1061 spin_lock(&journal->j_list_lock);
1062 /*
1063 * Now recheck if some buffers did not get attached to the transaction
1064 * while the lock was dropped...
1065 */
1066 if (commit_transaction->t_forget) {
1067 spin_unlock(&journal->j_list_lock);
a931da6a 1068 write_unlock(&journal->j_state_lock);
470decc6
DK
1069 goto restart_loop;
1070 }
1071
d4e839d4
TT
1072 /* Add the transaction to the checkpoint list
1073 * __journal_remove_checkpoint() can not destroy transaction
1074 * under us because it is not marked as T_FINISHED yet */
1075 if (journal->j_checkpoint_transactions == NULL) {
1076 journal->j_checkpoint_transactions = commit_transaction;
1077 commit_transaction->t_cpnext = commit_transaction;
1078 commit_transaction->t_cpprev = commit_transaction;
1079 } else {
1080 commit_transaction->t_cpnext =
1081 journal->j_checkpoint_transactions;
1082 commit_transaction->t_cpprev =
1083 commit_transaction->t_cpnext->t_cpprev;
1084 commit_transaction->t_cpnext->t_cpprev =
1085 commit_transaction;
1086 commit_transaction->t_cpprev->t_cpnext =
1087 commit_transaction;
1088 }
1089 spin_unlock(&journal->j_list_lock);
1090
470decc6
DK
1091 /* Done with this transaction! */
1092
cb3b3bf2 1093 jbd2_debug(3, "JBD2: commit phase 7\n");
470decc6 1094
bbd2be36 1095 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
470decc6 1096
8e85fb3f 1097 commit_transaction->t_start = jiffies;
bf699327
TT
1098 stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
1099 commit_transaction->t_start);
8e85fb3f
JL
1100
1101 /*
bf699327 1102 * File the transaction statistics
8e85fb3f 1103 */
8e85fb3f 1104 stats.ts_tid = commit_transaction->t_tid;
8dd42046
TT
1105 stats.run.rs_handle_count =
1106 atomic_read(&commit_transaction->t_handle_count);
bf699327
TT
1107 trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
1108 commit_transaction->t_tid, &stats.run);
42cf3452 1109 stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
8e85fb3f 1110
794446c6 1111 commit_transaction->t_state = T_COMMIT_CALLBACK;
470decc6
DK
1112 J_ASSERT(commit_transaction == journal->j_committing_transaction);
1113 journal->j_commit_sequence = commit_transaction->t_tid;
1114 journal->j_committing_transaction = NULL;
e07f7183 1115 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
470decc6 1116
e07f7183
JB
1117 /*
1118 * weight the commit time higher than the average time so we don't
1119 * react too strongly to vast changes in the commit time
1120 */
1121 if (likely(journal->j_average_commit_time))
1122 journal->j_average_commit_time = (commit_time +
1123 journal->j_average_commit_time*3) / 4;
1124 else
1125 journal->j_average_commit_time = commit_time;
794446c6 1126
a931da6a 1127 write_unlock(&journal->j_state_lock);
6c20ec85 1128
fb68407b
AK
1129 if (journal->j_commit_callback)
1130 journal->j_commit_callback(journal, commit_transaction);
ff780b91 1131 if (journal->j_fc_cleanup_callback)
e85c81ba 1132 journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
fb68407b 1133
879c5e6b 1134 trace_jbd2_end_commit(journal, commit_transaction);
cb3b3bf2 1135 jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
470decc6
DK
1136 journal->j_commit_sequence, journal->j_tail_sequence);
1137
794446c6 1138 write_lock(&journal->j_state_lock);
ff780b91
HS
1139 journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
1140 journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
794446c6
DM
1141 spin_lock(&journal->j_list_lock);
1142 commit_transaction->t_state = T_FINISHED;
d4e839d4 1143 /* Check if the transaction can be dropped now that we are finished */
794446c6
DM
1144 if (commit_transaction->t_checkpoint_list == NULL &&
1145 commit_transaction->t_checkpoint_io_list == NULL) {
1146 __jbd2_journal_drop_transaction(journal, commit_transaction);
1147 jbd2_journal_free_transaction(commit_transaction);
1148 }
1149 spin_unlock(&journal->j_list_lock);
1150 write_unlock(&journal->j_state_lock);
470decc6 1151 wake_up(&journal->j_wait_done_commit);
ff780b91 1152 wake_up(&journal->j_fc_wait);
42cf3452
TT
1153
1154 /*
1155 * Calculate overall stats
1156 */
1157 spin_lock(&journal->j_history_lock);
1158 journal->j_stats.ts_tid++;
1159 journal->j_stats.ts_requested += stats.ts_requested;
1160 journal->j_stats.run.rs_wait += stats.run.rs_wait;
1161 journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
1162 journal->j_stats.run.rs_running += stats.run.rs_running;
1163 journal->j_stats.run.rs_locked += stats.run.rs_locked;
1164 journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
1165 journal->j_stats.run.rs_logging += stats.run.rs_logging;
1166 journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
1167 journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
1168 journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
1169 spin_unlock(&journal->j_history_lock);
470decc6 1170}